1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
| import math
import matplotlib.pyplot as plt
import numpy
LINE_WIDTH = 3
MEAN = 0 # Don't change this since we assume a mean of 0
VAR = 2
SD = math.sqrt(VAR)
# In standard deviations
FROM_X = -4 #
TO_X = 4
NUMBER_OF_SAMPLES = 1000 # The more samples the smoother :)
def pdf(x, mean, var):
"""Probability density function of the Normal distribution"""
sd = math.sqrt(var)
return 1 / (sd * (math.sqrt(2 * math.pi))) * (math.e ** -(((x - mean) ** 2) / (2 * var)))
# Highlight the x- and y-axis
plt.axvline(color=(0.75,0.75,0.75), linewidth=1)
plt.axhline(color=(0.75,0.75,0.75), linewidth=1)
# Take NUMBER_OF_SAMPLES numbers evenly from [FROM_X, TO_X]
xs = numpy.linspace(FROM_X * SD , TO_X * SD, NUMBER_OF_SAMPLES)
ys = [pdf(x, MEAN, VAR) for x in xs]
plt.plot(xs, ys, linewidth=LINE_WIDTH, color="Blue")
# Fill area under curver where x lies within some standard deviations of the mean
plt.fill_between(xs, 0, ys, where=numpy.logical_and(xs > 3 * -SD, xs < 3 * SD), color="LightSteelBlue")
plt.fill_between(xs, 0, ys, where=numpy.logical_and(xs > 2 * -SD, xs < 2 * SD), color="CornflowerBlue")
plt.fill_between(xs, 0, ys, where=numpy.logical_and(xs > -SD, xs < SD), color="RoyalBlue")
x_text = SD - SD / 2
y_text = pdf(x_text, MEAN, VAR) / 2
plt.text(x_text, y_text, "%.1f%%" % (68 / 2.0,), fontsize=14, ha="center")
plt.text(-x_text, y_text, "%.1f%%" % (68 / 2.0,), fontsize=14, ha="center")
# The used factors are basically arbitrary and used to position the text
x_text = 2 * SD - SD / 2
y_text = pdf(x_text, MEAN, VAR) / 4
plt.text(x_text, y_text, "%.1f%%" % ((95 - 68) / 2.0,), fontsize=14, ha="center")
plt.text(-x_text, y_text, "%.1f%%" % ((95 - 68) / 2.0,), fontsize=14, ha="center")
x_point = 3 * SD - SD / 2
y_point = pdf(x_point, MEAN, VAR) * 1.25
x_text = 3 * SD - SD / 2
y_text = pdf(x_text, MEAN, VAR) * 3
# The height of the area between the third and second standard deviation is too small
# We are use an arrow pointing to that area instead
text = "%.2f%%" % ((99.7 - 95) / 2.0,)
plt.annotate(text, (x_point, y_point), (x_text, y_text),
ha="left", fontsize=14, arrowprops=dict(arrowstyle='-'), )
plt.annotate(text, (-x_point, y_point), (-x_text, y_text),
ha="right", fontsize=14, arrowprops=dict(arrowstyle='-'), )
# Configure the x-axis size
plt.xlim((FROM_X, TO_X))
# Configure the y-axis size to capture the height of the curve
plt.ylim((-0.025, pdf(MEAN, MEAN, VAR) + 0.05))
# Configure the x-axis labels
xticks_locs = [SD * i for i in range(FROM_X, TO_X + 1)]
# Show the x values in numbers of standard deviation counted from the mean
xticks_labels = ["$%i\sigma$" % i if i != MEAN else "$\mu$ " for i in range(FROM_X, TO_X + 1)]
plt.xticks(xticks_locs, xticks_labels)
plt.title("The normal distribution $\mathcal{N}(%i, %i)$" % (MEAN, VAR))
plt.grid() # Toggle the axes grid
plt.gcf().set_size_inches(10, 6)
plt.savefig('002-three-sigma-rule.png',dpi=100)
plt.show() # Show the actual plot
|