# DS_D27_LU2_V1.0

Demo - Margin of Error and Confidence Interval 

In this demo, you will be shown how to calculate margin of error & confidence interval

In [5]:
import math
from scipy import stats
import numpy as np
import pandas as pd

In [1]:


# step1 Load the Boston Housing dataset

df = pd.read_csv('boston_house_prices.csv', header=1)

# step2 Rename the 'MEDV' column to 'target'
df.rename(columns={'MEDV': 'target'}, inplace=True)

# Now, the 'target' column has the same values as the original 

In [7]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [12]:
df.shape

(506, 14)

In [2]:
#Step3: Generate sample
sample_size = 200  #rows
sample = df.sample(n=sample_size, random_state=1)

In [9]:
sample.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
307,0.04932,33.0,2.18,0,0.472,6.849,70.3,3.1827,7,222,18.4,396.9,7.53,28.2
343,0.02543,55.0,3.78,0,0.484,6.696,56.4,5.7321,5,370,17.6,396.9,7.18,23.9
47,0.22927,0.0,6.91,0,0.448,6.03,85.5,5.6894,3,233,17.9,392.74,18.8,16.6
67,0.05789,12.5,6.07,0,0.409,5.878,21.4,6.498,4,345,18.9,396.21,8.1,22.0
362,3.67822,0.0,18.1,0,0.77,5.362,96.2,2.1036,24,666,20.2,380.79,10.19,20.8


In [11]:
sample.shape

(200, 14)

### Calculate Z-critical, Margin of Error & Confidence Interval

In [4]:
#the seed ensures that the sequence of random numbers generated by the NumPy random functions will be the same every
#time you run the code.
sample_mean = sample.target.mean()
np.random.seed(1)

"""
signifies the number of standard deviations 
you'd have to go from the mean of the normal 
distribution to capture the proportion of the
data associated with the desired confidence level
"""
#Step4: Get the z-critical value
z_critical = stats.norm.ppf(q = 0.975)  

#Step5: Get the  standard deviation
sample_stdev = sample.target.std() 

#Step6: Calculate margin of error
margin_of_error = z_critical * (sample_stdev/math.sqrt(sample_size))

#Step7: Calculate confidence interval
confidence_interval = (sample_mean - margin_of_error,
                       sample_mean + margin_of_error)  

print("Z-critical value:", z_critical)
print("Margin of Error:", margin_of_error)
print("Confidence Interval:", confidence_interval)

Z-critical value: 1.959963984540054
Margin of Error: 1.3275186301421031
Confidence Interval: (21.8579813698579, 24.513018630142103)


In this interval, 21.8579813698579 is the lower limit and 24.513018630142103 is the upper limit. This means that we are 95% confident that the true population parameter falls between these two values.