In [1]:
import pandas as pd
import numpy as np

# Number of cross-sectional units and time periods
N = 100
T = 10

# Create DataFrame to hold the panel data
data = pd.DataFrame({
    'id': np.repeat(np.arange(N), T),
    'time': np.tile(np.arange(T), N),
})

# Generate independent variables
np.random.seed(42)
data['x1'] = np.random.normal(100, 10, N*T)  # IV1
data['x2'] = np.random.normal(50, 5, N*T)    # IV2
data['x3'] = np.random.normal(30, 3, N*T)    # IV3

# Polynomial term
data['x3_squared'] = data['x3'] ** 2

# Individual fixed effects
individual_effects = np.random.normal(0, 1, N)
data['individual_effect'] = data['id'].map(lambda x: individual_effects[x])

# Time fixed effects
time_effects = np.random.normal(0, 1, T)
data['time_effect'] = data['time'].map(lambda x: time_effects[x])

# Generate the dependent variable (DV)
beta1 = 0.5
beta2 = 0.3
beta3 = -0.2
beta4 = 0.01

error_term = np.random.normal(0, 0.5, N*T)

data['y'] = (beta1 * data['x1'] +
             beta2 * data['x2'] +
             beta3 * data['x3'] +
             beta4 * data['x3_squared'] +
             data['individual_effect'] +
             data['time_effect'] +
             error_term)

# Check the first few rows to ensure everything looks as expected
print(data.head())


   id  time          x1         x2         x3  x3_squared  individual_effect  \
0   0     0  104.967142  56.996777  27.974465  782.570702          -1.907808   
1   0     1   98.617357  54.623168  29.566444  874.174610          -1.907808   
2   0     2  106.476885  50.298152  27.622740  763.015778          -1.907808   
3   0     3  115.230299  46.765316  29.076115  845.420487          -1.907808   
4   0     4   97.658466  53.491117  24.319156  591.421349          -1.907808   

   time_effect          y  
0    -0.706893  68.617826  
1     0.855556  67.330265  
2     1.649481  70.024741  
3     1.070611  72.841790  
4    -0.729602  63.484030  
