In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Import and read the walmart_cleaned.csv file
application_df = pd.read_csv("walmart_cleaned.csv")

# Drop irrelevant columns (Store and Date)
application_df.drop(columns=['Date'], inplace=True)

# Convert 'Holiday_Flag' to a binary target variable (1 if it's a holiday, 0 if it's not)
application_df['Is_Holiday'] = application_df['Holiday_Flag']

# Drop the 'Holiday_Flag' column since we have extracted the target variable
application_df.drop(columns=['Holiday_Flag'], inplace=True)

# Group data by 'Store' column
grouped_data = application_df.groupby('Store')

# Create lists to store results for all stores
all_accuracies = []
all_conf_matrices = []
all_classification_reps = []

# Loop through all stores
for store, store_df in grouped_data:
    print(f"Processing Store {store}...")

    # Separate the features and the target variable
    X = store_df.drop(columns=['Is_Holiday'])
    y = store_df['Is_Holiday']

    # Split the data into training and testing sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create and train the logistic regression model
    logistic_model = LogisticRegression()
    logistic_model.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = logistic_model.predict(X_test)

    # Evaluate the model's performance
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)

    # Store results for this store
    all_accuracies.append(accuracy)
    all_conf_matrices.append(conf_matrix)
    all_classification_reps.append(classification_rep)

    # Print the results for this store
    print("Accuracy:", accuracy)
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(classification_rep)
    print("-" * 50)

# Print overall average accuracy for all stores
print("Overall Average Accuracy:", sum(all_accuracies) / len(all_accuracies))


Processing Store 1...
Accuracy: 0.9655172413793104
Confusion Matrix:
[[28  0]
 [ 1  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        28
           1       0.00      0.00      0.00         1

    accuracy                           0.97        29
   macro avg       0.48      0.50      0.49        29
weighted avg       0.93      0.97      0.95        29

--------------------------------------------------
Processing Store 2...
Accuracy: 0.9655172413793104
Confusion Matrix:
[[28  0]
 [ 1  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        28
           1       0.00      0.00      0.00         1

    accuracy                           0.97        29
   macro avg       0.48      0.50      0.49        29
weighted avg       0.93      0.97      0.95        29

--------------------------------------------------
Processing Store 3...


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Accuracy: 0.9655172413793104
Confusion Matrix:
[[28  0]
 [ 1  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        28
           1       0.00      0.00      0.00         1

    accuracy                           0.97        29
   macro avg       0.48      0.50      0.49        29
weighted avg       0.93      0.97      0.95        29

--------------------------------------------------
Processing Store 10...
Accuracy: 0.9655172413793104
Confusion Matrix:
[[28  0]
 [ 1  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        28
           1       0.00      0.00      0.00         1

    accuracy                           0.97        29
   macro avg       0.48      0.50      0.49        29
weighted avg       0.93      0.97      0.95        29

--------------------------------------------------
Processing Store 11...
Accuracy: 0.96551724

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Accuracy: 0.9655172413793104
Confusion Matrix:
[[28  0]
 [ 1  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        28
           1       0.00      0.00      0.00         1

    accuracy                           0.97        29
   macro avg       0.48      0.50      0.49        29
weighted avg       0.93      0.97      0.95        29

--------------------------------------------------
Processing Store 19...
Accuracy: 0.9655172413793104
Confusion Matrix:
[[28  0]
 [ 1  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        28
           1       0.00      0.00      0.00         1

    accuracy                           0.97        29
   macro avg       0.48      0.50      0.49        29
weighted avg       0.93      0.97      0.95        29

--------------------------------------------------
Processing Store 20...
Accuracy: 0.96551724

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Accuracy: 0.9655172413793104
Confusion Matrix:
[[28  0]
 [ 1  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        28
           1       0.00      0.00      0.00         1

    accuracy                           0.97        29
   macro avg       0.48      0.50      0.49        29
weighted avg       0.93      0.97      0.95        29

--------------------------------------------------
Processing Store 28...
Accuracy: 0.9655172413793104
Confusion Matrix:
[[28  0]
 [ 1  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        28
           1       0.00      0.00      0.00         1

    accuracy                           0.97        29
   macro avg       0.48      0.50      0.49        29
weighted avg       0.93      0.97      0.95        29

--------------------------------------------------
Processing Store 29...
Accuracy: 0.96551724

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Accuracy: 0.9655172413793104
Confusion Matrix:
[[28  0]
 [ 1  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        28
           1       0.00      0.00      0.00         1

    accuracy                           0.97        29
   macro avg       0.48      0.50      0.49        29
weighted avg       0.93      0.97      0.95        29

--------------------------------------------------
Processing Store 38...
Accuracy: 0.9655172413793104
Confusion Matrix:
[[28  0]
 [ 1  0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        28
           1       0.00      0.00      0.00         1

    accuracy                           0.97        29
   macro avg       0.48      0.50      0.49        29
weighted avg       0.93      0.97      0.95        29

--------------------------------------------------
Processing Store 39...
Accuracy: 0.96551724

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [None]:
# The accuracy for Store 45 is approximately 96.55%.
#The classification report shows that the model performs well for class 0 (non-holiday) with high precision, recall, and F1-score. However, it performs poorly for class 1 (holiday) with a precision, recall, and F1-score of 0. This suggests that the model is not able to correctly identify any instances of class 1 in the test set for Store 45.
