# 問2 Pythonを使って母平均の差の区間推定（対応のあるデータ）

In [1]:
import numpy as np
from scipy import stats

In [2]:
l1 = [8.0, 5.4, 4.8, 5.8, 6.4, 5.8]
l2 = [7.0, 5.0, 3.6, 7.0, 7.0, 5.4]

In [3]:
l3 = [x - y for x, y in zip(l1, l2)]

In [4]:
print(l3)

[1.0, 0.40000000000000036, 1.1999999999999997, -1.2000000000000002, -0.5999999999999996, 0.39999999999999947]


In [5]:
data4 = [round(x - y, 2) for x, y in zip(l1, l2)]
data4

[1.0, 0.4, 1.2, -1.2, -0.6, 0.4]

## 1.点数の差の平均値

In [6]:
barx = np.mean(data4)
barx

0.19999999999999996

## 2.点数の差の不偏分散 

In [13]:
s2 = np.var(data4, ddof=1)
s2

0.8639999999999999

## 3.区間推定

In [8]:
ci = 0.95 #信頼区間(confidence interval)

In [9]:
dof = len(data4) - 1 #自由度(degree of freedom)

In [14]:
se = np.sqrt(s2 / len(data4)) #標準誤差(統計量の標準偏差, standard error)

In [11]:
lower, upper = stats.t.interval(ci, dof, barx, se)

In [12]:
print(f'下限={lower:.2f}\n上限={upper:.2f}')

下限=-0.78
上限=1.18


# 問4 Pythonを使って母平均の差の区間推定（対応のないデータ）

In [15]:
l1 = [8.0, 5.4, 4.8, 5.8, 6.4, 5.6]
l2 = [9.0, 7.0, 3.6, 6.4, 6.0]

## 1.1組と2組の点数の平均値

In [16]:
barx1 = np.mean(l1)
barx2 = np.mean(l2)
barx1, barx2

(6.0, 6.4)

## 2.1組と2組点数の不偏分散 

In [18]:
s1_2 = np.var(l1, ddof=1)
s2_2 = np.var(l2, ddof=1)
s1_2, s2_2

(1.232, 3.78)

## 3.1組と2組の点数の不偏分散の加重平均

In [19]:
dof1 = len(l1) - 1 #1組の自由度
dof2 = len(l2) - 1 #2組の自由度

In [25]:
s2 = (dof1 * s1_2 + dof2 * s2_2) / (dof1 + dof2) #自由度で重み付けした不偏分散の加重平均
s2

2.3644444444444446

## 4.区間推定

In [22]:
ci = 0.95 #信頼区間(confidence interval)

In [23]:
barx = barx1 - barx2

In [24]:
dof = dof1 + dof2 #1組の自由度+2組の自由度

In [26]:
se = np.sqrt(s2 * (1 / len(l1) + 1 / len(l2))) #標準誤差(統計量の標準偏差, standard error)

In [27]:
lower, upper = stats.t.interval(ci, dof, barx, se)

In [28]:
print(f'下限={lower:.2f}\n上限={upper:.2f}')

下限=-2.51
上限=1.71


#### 補足　母分散が等しいかどうかわからない場合

In [29]:
n1 = len(l1)
n2 = len(l2)

In [30]:
phi = (s1_2/n1 + s2_2/n2)**2 / ((s1_2/n1)**2 / dof1 + (s2_2/n2)**2 / dof2)
phi #自由度

6.1074810742350545

In [31]:
#自由度6.1のときの右側0.025の位置を線形近似
t6 = 2.447 #t(6, 0.025)
t7 = 2.365 #t(7, 0.025)
t = (1 - 0.1) * t6 + 0.1 * t7
t 

2.4388

In [32]:
se = np.sqrt(s1_2 / len(l1) + s2_2 / len(l2)) #標準誤差(統計量の標準偏差, standard error)

In [33]:
lower, upper = stats.t.interval(ci, phi, barx, se)

In [34]:
print(f'下限={lower:.2f}\n上限={upper:.2f}')

下限=-2.79
上限=1.99


なお、標本数が近いとき、または不偏分散の値が近いときは、結果は大きくは変わらない

### 例 (標本数が近いとき)

In [44]:
l1 = [8.0, 5.4, 4.8, 5.8, 6.4, 1000]
l2 = [9.0, 7.0, 3.6, 6.4, 6.0]

## 1.1組と2組の点数の平均値

In [56]:
barx1 = np.mean(l1)
barx2 = np.mean(l2)
barx1, barx2

(171.73333333333335, 6.4)

## 2.1組と2組点数の不偏分散 

In [57]:
s1 = np.var(l1, ddof=1)
s2 = np.var(l2, ddof=1)
s1, s2

(164647.35466666665, 3.78)

## 3.1組と2組の点数の不偏分散の加重平均

In [58]:
dof1 = len(l1) - 1 #1組の自由度
dof2 = len(l2) - 1 #2組の自由度

In [59]:
s = (dof1 * s1 + dof2 * s2) / (dof1 + dof2) #自由度で重み付けした不偏分散の平均
s

91472.43259259258

## 4.区間推定

In [43]:
ci = 0.95 #信頼区間(confidence interval)

In [61]:
barx = barx1 - barx2

In [62]:
dof = dof1 + dof2 #1組の自由度+2組の自由度

In [63]:
se = np.sqrt(s * (1 / len(l1) + 1 / len(l2))) #標準誤差(統計量の標準偏差, standard error)

In [64]:
lower, upper = stats.t.interval(ci, dof, barx, se)

In [65]:
print(f'下限={lower:.2f}\n上限={upper:.2f}')

下限=-248.96
上限=579.62


In [66]:
phi = (s1/n1 + s2/n2)**2 / ((s1/n1)**2 / dof1 + (s2/n2)**2 / dof2)
phi

5.000275496948261

In [67]:
se = np.sqrt(s1 / len(l1) + s2 / len(l2)) #標準誤差(統計量の標準偏差, standard error)

In [68]:
lower, upper = stats.t.interval(ci, phi, barx, se)

In [69]:
print(f'下限={lower:.2f}\n上限={upper:.2f}')

下限=-260.49
上限=591.16


### 例 (不偏分散の値が近いとき)

In [69]:
l1 = [8.0, 5.4, 4.8, 5.8, 6.4, 5.6]
l2 = [9.0, 7.0, 3.6, 6.4, 6.0, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8]

## 1.1組と2組の点数の平均値

In [70]:
barx1 = np.mean(l1)
barx2 = np.mean(l2)
barx1, barx2

(6.0, 6.8)

## 2.1組と2組点数の不偏分散 

In [71]:
s1 = np.var(l1, ddof=1)
s2 = np.var(l2, ddof=1)
s1, s2

(1.232, 1.2466666666666666)

## 3.1組と2組の点数の不偏分散の加重平均

In [72]:
dof1 = len(l1) - 1 #1組の自由度
dof2 = len(l2) - 1 #2組の自由度

In [73]:
n1 = len(l1)
n2 = len(l2)

In [74]:
s = (dof1 * s1 + dof2 * s2) / (dof1 + dof2) #自由度で重み付けした不偏分散の平均
s

1.2441379310344827

## 4.区間推定

In [75]:
ci = 0.95 #信頼区間(confidence interval)

In [76]:
barx = barx1 - barx2

In [77]:
dof = dof1 + dof2 #1組の自由度+2組の自由度

In [78]:
se = np.sqrt(s * (1 / len(l1) + 1 / len(l2))) #標準誤差(統計量の標準偏差, standard error)

In [79]:
lower, upper = stats.t.interval(ci, dof, barx, se)

In [80]:
print(f'下限={lower:.2f}\n上限={upper:.2f}')

下限=-1.84
上限=0.24


In [81]:
phi = (s1/n1 + s2/n2)**2 / ((s1/n1)**2 / dof1 + (s2/n2)**2 / dof2)
phi

7.629719853836785

In [82]:
se = np.sqrt(s1 / len(l1) + s2 / len(l2)) #標準誤差(統計量の標準偏差, standard error)

In [83]:
lower, upper = stats.t.interval(ci, phi, barx, se)

In [84]:
print(f'下限={lower:.2f}\n上限={upper:.2f}')

下限=-1.97
上限=0.37


### 例 (標本数も不偏分散の値も違うとき)

In [99]:
l1 = [8.0, 5.4, 4.8, 5.8, 6.4, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]
l2 = [9.0, 7.0, 3.6, 6.4, 6.0]

## 1.1組と2組の点数の平均値

In [100]:
barx1 = np.mean(l1)
barx2 = np.mean(l2)
barx1, barx2

(738.4421052631578, 6.4)

## 2.1組と2組点数の不偏分散 

In [101]:
s1 = np.var(l1, ddof=1)
s2 = np.var(l2, ddof=1)
s1, s2

(202197.37146198828, 3.78)

## 3.1組と2組の点数の不偏分散の加重平均

In [102]:
dof1 = len(l1) - 1 #1組の自由度
dof2 = len(l2) - 1 #2組の自由度

In [103]:
n1 = len(l1)
n2 = len(l2)

In [104]:
s = (dof1 * s1 + dof2 * s2) / (dof1 + dof2) #自由度で重み付けした不偏分散の平均
s

165434.9002870813

## 4.区間推定

In [105]:
ci = 0.95 #信頼区間(confidence interval)

In [106]:
barx = barx1 - barx2

In [107]:
dof = dof1 + dof2 #1組の自由度+2組の自由度

In [108]:
se = np.sqrt(s * (1 / len(l1) + 1 / len(l2))) #標準誤差(統計量の標準偏差, standard error)

In [109]:
lower, upper = stats.t.interval(ci, dof, barx, se)

In [110]:
print(f'下限={lower:.2f}\n上限={upper:.2f}')

下限=308.07
上限=1156.02


In [111]:
phi = (s1/n1 + s2/n2)**2 / ((s1/n1)**2 / dof1 + (s2/n2)**2 / dof2)
phi

18.002557103975207

In [112]:
se = np.sqrt(s1 / len(l1) + s2 / len(l2)) #標準誤差(統計量の標準偏差, standard error)

In [113]:
lower, upper = stats.t.interval(ci, phi, barx, se)

In [114]:
print(f'下限={lower:.2f}\n上限={upper:.2f}')

下限=515.31
上限=948.78


# 問6 Pythonを使って母分散の比の区間推定

In [35]:
lA = [10, 9, 8, 9, 10]
lB = [10, 10, 9, 9, 9, 10]

## 1.装置Aと装置Bの不偏分散

In [36]:
sA2 = np.var(lA, ddof=1)
sB2 = np.var(lB, ddof=1)
sA2, sB2

(0.7, 0.3)

## 2.区間推定

In [37]:
ci = 0.95 #信頼区間(confidence interval)

In [38]:
dofA = len(lA) - 1 #装置Aの自由度
dofB = len(lB) - 1 #装置Bの自由度

In [39]:
F_0975, F_0025 = stats.f.interval(ci, dofA, dofB)

In [40]:
F_0975, F_0025

(0.10678660008335832, 7.387885751267751)

In [41]:
lower = 1 / F_0025 * sA2 / sB2
upper = 1 / F_0975 * sA2 / sB2

In [42]:
print(f'下限={lower:.2f}\n上限={upper:.2f}')

下限=0.32
上限=21.85
