In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score, classification_report

# Create a synthetic dataset
data = pd.DataFrame({
    'Feature1': np.random.rand(100) * 10,
    'Feature2': np.random.rand(100) * 5,
    'Feature3': np.random.rand(100) * 20,
})

# Generate a binary target variable based on Feature1 and Feature2
data['Target'] = (data['Feature1'] + data['Feature2'] > 7).astype(int)

# Define predictor variables and target variable
X = data[['Feature1', 'Feature2', 'Feature3']]
y = data['Target']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Calculate accuracy and print classification report
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:\n", report)

# Predict value for unknown inputs
unknown_inputs = np.array([[5, 2, 10]])  # Example unknown inputs
predicted_value = model.predict(unknown_inputs)
print(f"Predicted class for unknown inputs {unknown_inputs}: {predicted_value[0]}")

# Linear Discriminant Analysis (LDA) using standard library
lda = LDA()
lda.fit(X_train, y_train)
y_pred_lda = lda.predict(X_test)
lda_accuracy = accuracy_score(y_test, y_pred_lda)
print(f"LDA Accuracy: {lda_accuracy:.4f}")

# LDA using matrix multiplication
# Convert to DataFrame for indexing
X_train_df = pd.DataFrame(X_train, columns=['Feature1', 'Feature2', 'Feature3'])
y_train_df = pd.Series(y_train)

mean_vectors = []
for cl in np.unique(y_train):
    mean_vectors.append(np.mean(X_train_df[y_train_df == cl], axis=0))

S_W = np.zeros((X_train.shape[1], X_train.shape[1]))
for cl, mv in zip(np.unique(y_train), mean_vectors):
    class_sc_mat = np.zeros((X_train.shape[1], X_train.shape[1]))

    # Select only the rows where the class label matches
    subset = X_train_df[y_train_df == cl].values  # Convert to NumPy array

    mv = mv.to_numpy().reshape(-1, 1)  # Ensure mv is a column vector

    for row in subset:  # Iterate over rows of the subset
        row = row.reshape(-1, 1)  # Convert row to a column vector
        class_sc_mat += (row - mv).dot((row - mv).T)  # Compute scatter matrix

    S_W += class_sc_mat  # Accumulate within-class scatter matrix




mean_overall = np.mean(X_train_df, axis=0).to_numpy().reshape(-1, 1)
S_B = np.zeros((X_train.shape[1], X_train.shape[1]))

for cl, mv in zip(np.unique(y_train), mean_vectors):
    n = X_train_df[y_train_df == cl].shape[0]

    # Ensure mv is a NumPy array before reshaping
    mv_arr = mv.to_numpy().reshape(-1, 1)

    # Compute between-class scatter matrix
    S_B += n * (mv_arr - mean_overall).dot((mv_arr - mean_overall).T)


# Solve the eigenvalue problem for inv(S_W) * S_B
eig_vals, eig_vecs = np.linalg.eig(np.linalg.inv(S_W).dot(S_B))

print("Eigenvalues:", eig_vals)
print("Eigenvectors:\n", eig_vecs)

Accuracy: 0.9500
Classification Report:
               precision    recall  f1-score   support

           0       0.86      1.00      0.92         6
           1       1.00      0.93      0.96        14

    accuracy                           0.95        20
   macro avg       0.93      0.96      0.94        20
weighted avg       0.96      0.95      0.95        20

Predicted class for unknown inputs [[ 5  2 10]]: 1
LDA Accuracy: 0.9000
Eigenvalues: [2.49960978e+00+0.00000000e+00j 7.49203090e-18+3.61341887e-18j
 7.49203090e-18-3.61341887e-18j]
Eigenvectors:
 [[ 0.91465634+0.j          0.04491144+0.00853296j  0.04491144-0.00853296j]
 [ 0.40314087+0.j         -0.9978982 +0.j         -0.9978982 -0.j        ]
 [ 0.02968549+0.j          0.0283089 -0.03616547j  0.0283089 +0.03616547j]]


