# PyMCによる項目反応理論の実装

## 疑似データの生成
参考サイトは[試験の数理 その１（問題設定とデータの生成）](https://qiita.com/takuyakubo/items/43d56725952e67032b49)

In [1]:
import pandas as pd
import numpy as np
from functools import partial

In [2]:
def get_3PLM_prob(a, b, c, x):
    return c + (1 - c) / (1 + np.exp(-  a * (x - b)))

In [3]:
# model parameterの定義
a_min = 0.3
a_max = 1

b_min = -2
b_max = 2

c_min = 0
c_max = .4

# 設問の数，受験者の数
num_items = 20
num_users = 2000

In [4]:
# 問題parameterの生成
item_params = np.array(
    [np.random.uniform(a_min, a_max, num_items),
     np.random.uniform(b_min, b_max, num_items),
     np.random.uniform(c_min, c_max, num_items)]
).T

item_df = pd.DataFrame(item_params)
item_df.columns = ['a', 'b', 'c']
item_df.to_csv('dataset/item_params.tsv', sep='\t', index=False)

item_df.head()

Unnamed: 0,a,b,c
0,0.530947,-1.126938,0.262671
1,0.778774,0.440435,0.016508
2,0.85157,1.997521,0.078272
3,0.496084,-1.853463,0.310627
4,0.592267,-0.531258,0.134943


In [5]:
# 受験者parameterの生成
user_params = np.random.normal(size=num_users)

user_df = pd.DataFrame(user_params)
user_df.columns = ['theta']
user_df.to_csv('dataset/participant_params.tsv', sep='\t', index=False)

user_df.head()

Unnamed: 0,theta
0,-0.846466
1,-0.21525
2,0.367516
3,0.283451
4,2.386955


In [6]:
# 項目反応行列の作成、 要素は1(正答)か0(誤答)
# i行j列は問iに受験者jがどう反応したか
ir_matrix = np.vectorize(int)(
    np.array(
        [partial(get_3PLM_prob, *ip)(user_params) > np.random.uniform(0, 1, num_users) for ip in item_params]
    )
)

# データフレーム化
response_df = pd.DataFrame(ir_matrix.T)
response_df.columns = [f'Q{qid}' for qid in range(1, num_items+1)]
response_df = response_df.reset_index().rename(columns={'index': 'participant'})
response_df.to_csv('dataset/item_response.tsv', sep='\t', index=False)

response_df.head()

Unnamed: 0,participant,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,...,Q11,Q12,Q13,Q14,Q15,Q16,Q17,Q18,Q19,Q20
0,0,0,0,0,1,1,0,0,1,0,...,0,0,1,1,0,0,0,1,1,0
1,1,1,1,0,1,1,1,0,0,1,...,1,1,1,1,0,1,0,0,1,0
2,2,1,0,0,1,1,0,1,1,1,...,0,1,1,0,0,0,0,1,0,1
3,3,1,1,0,1,1,1,1,0,0,...,1,1,1,1,1,1,0,1,1,1
4,4,1,1,0,1,0,1,1,0,1,...,1,1,1,1,1,1,1,1,0,1
