In [4]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.sandbox.regression.gmm import IV2SLS

# Load the dataset
file_path = '/Users/luwei/Desktop/MMAI/predictive modelling/midterm_partone.csv'
data = pd.read_csv(file_path)

# Dependent variable (Y): Assuming 'Stock Change' is the dependent variable
Y = data['Stock Change']

# Independent variables (X): Trying different combinations
X = data[['Operating Profit', 'Debt Asset Ratio', 'Interaction Effect']]
X = sm.add_constant(X)  # Adding a constant

# Instrumental variables (Z): Selecting different variables as instruments
Z = data[['Inventory Turnover', 'Current Ratio']]
Z = sm.add_constant(Z)  # Adding a constant

# Set up the GMM model
gmm_model = IV2SLS(Y, X, Z)

# Fit the model
gmm_results = gmm_model.fit()

# Print the results
gmm_results.summary()






0,1,2,3
Dep. Variable:,Stock Change,R-squared:,-0.439
Model:,IV2SLS,Adj. R-squared:,-0.441
Method:,Two Stage,F-statistic:,0.7299
,Least Squares,Prob (F-statistic):,0.534
Date:,"Sat, 11 Nov 2023",,
Time:,17:59:44,,
No. Observations:,1696,,
Df Residuals:,1692,,
Df Model:,3,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0017,5.44e+05,3.21e-09,1.000,-1.07e+06,1.07e+06
Operating Profit,-0.4634,4.43e+06,-1.05e-07,1.000,-8.69e+06,8.69e+06
Debt Asset Ratio,0.7160,3.29e+05,2.18e-06,1.000,-6.44e+05,6.44e+05
Interaction Effect,-0.0006,1.14e+04,-5.07e-08,1.000,-2.25e+04,2.25e+04

0,1,2,3
Omnibus:,278.615,Durbin-Watson:,1.91
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3586.101
Skew:,0.332,Prob(JB):,0.0
Kurtosis:,10.093,Cond. No.,159.0


Part 2

In [11]:
import pandas as pd

csv_file_path = '/Users/luwei/Desktop/MMAI/predictive modelling/midterm_parttwo.csv'  # Replace with your file path
data = pd.read_csv(csv_file_path)


data_encoded = pd.get_dummies(data, drop_first=True)


In [12]:
from sklearn.model_selection import train_test_split

X = data_encoded.drop('Credit Rating_Positive', axis=1)
y = data_encoded['Credit Rating_Positive']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)


In [13]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)


In [14]:
from sklearn.metrics import confusion_matrix, classification_report
y_pred = log_reg.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", report)


Confusion Matrix:
 [[   0  577]
 [   0 3464]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       577
           1       0.86      1.00      0.92      3464

    accuracy                           0.86      4041
   macro avg       0.43      0.50      0.46      4041
weighted avg       0.73      0.86      0.79      4041



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
import numpy as np
y_pred_prob = log_reg.predict_proba(X_test)[:, 1]
threshold_15_percent = np.percentile(y_pred_prob, 85)
y_pred_threshold = (y_pred_prob >= threshold_15_percent).astype(int)
conf_matrix_threshold = confusion_matrix(y_test, y_pred_threshold)
report_threshold = classification_report(y_test, y_pred_threshold)

print("Threshold:", threshold_15_percent)
print("Updated Confusion Matrix:\n", conf_matrix_threshold)
print("\nUpdated Classification Report:\n", report_threshold)


Threshold: 0.8875163810763488
Updated Confusion Matrix:
 [[ 495   82]
 [2936  528]]

Updated Classification Report:
               precision    recall  f1-score   support

           0       0.14      0.86      0.25       577
           1       0.87      0.15      0.26      3464

    accuracy                           0.25      4041
   macro avg       0.50      0.51      0.25      4041
weighted avg       0.76      0.25      0.26      4041

