In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.exceptions import ConvergenceWarning
import warnings

# Function to load npz file
def load_npz_file(file_path):
    try:
        data = np.load(file_path)
        print("Arrays in the file:", list(data.keys()))
        for array_name in data:
            print(f"{array_name}:")
            print(data[array_name])
        return data
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Replace with your npz file path
npz_file_path = r"C:\Users\An\pneumoniamnist.npz"

# Load npz file
loaded_data = load_npz_file(npz_file_path)

# Extract arrays from loaded data
train_images, val_images, test_images = loaded_data['train_images'], loaded_data['val_images'], loaded_data['test_images']
train_labels, val_labels, test_labels = loaded_data['train_labels'], loaded_data['val_labels'], loaded_data['test_labels']

# Flatten images
def flatten_images(images):
    return images.reshape(images.shape[0], -1)

X_train, X_val, X_test = flatten_images(train_images), flatten_images(val_images), flatten_images(test_images)
y_train, y_val, y_test = train_labels.flatten(), val_labels.flatten(), test_labels.flatten()

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Suppress the convergence warning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Initialize logistic regression model
model = LogisticRegression(C=0.25, penalty='l1', solver='liblinear', max_iter=500)  # L1 regularization
model.fit(X_train_scaled, y_train)

# Make predictions and evaluate accuracy on the training set
train_predictions = model.predict(X_train_scaled)
accuracy_train = accuracy_score(y_train, train_predictions)
print("Training Accuracy:", accuracy_train)

# Make predictions and evaluate accuracy on the validation set
val_predictions = model.predict(X_val_scaled)
accuracy_val = accuracy_score(y_val, val_predictions)
print("Validation Accuracy:", accuracy_val)

# Make predictions and evaluate accuracy on the test set
test_predictions = model.predict(X_test_scaled)
accuracy_test = accuracy_score(y_test, test_predictions)
print("Test Accuracy:", accuracy_test)

# Print confusion matrix for the test set
conf_matrix_test = confusion_matrix(y_test, test_predictions)
print("Confusion Matrix (Test Set):")
print(conf_matrix_test)

Arrays in the file: ['train_images', 'val_images', 'test_images', 'train_labels', 'val_labels', 'test_labels']
train_images:
[[[ 92 108 117 ...   5   8   6]
  [129 138 141 ...   4   2   3]
  [141 146 148 ...   0   0   0]
  ...
  [168 180 192 ... 119 115 126]
  [173 184 195 ... 153 139 132]
  [173 185 198 ... 168 154 139]]

 [[115 118 117 ... 123 115 104]
  [130 137 140 ... 146 135 121]
  [134 151 167 ... 152 138 123]
  ...
  [176 180 187 ... 169 151 142]
  [181 185 192 ... 173 163 158]
  [184 189 195 ... 183 176 169]]

 [[149 146 147 ... 128 141 151]
  [171 153 147 ... 149 159 166]
  [172 166 167 ... 162 170 172]
  ...
  [ 95 133 174 ... 159 118 150]
  [139 166 196 ... 185 147 150]
  [181 184 191 ... 188 169 157]]

 ...

 [[ 86  97 103 ...  53  44  44]
  [ 88 102 116 ...  57  46  46]
  [ 96 106 125 ...  74  60  58]
  ...
  [ 43  58  81 ... 147 139 132]
  [ 90 106 127 ... 148 138 130]
  [132 142 153 ... 145 136 129]]

 [[ 14   0   0 ...  96  95  99]
  [ 66  56  65 ... 119 115 118]
  [ 9

Training Accuracy: 0.973874256584537
Validation Accuracy: 0.9599236641221374
Test Accuracy: 0.8573717948717948
Confusion Matrix (Test Set):
[[151  83]
 [  6 384]]
