# Library requirements

In [None]:
!pip install transformers datasets
!pip install opendatasets

In [None]:
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from numpy import loadtxt
import matplotlib.pyplot as plt
from xgboost import plot_importance, plot_tree
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from datasets import load_dataset
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from transformers import AutoModelForImageClassification, AutoFeatureExtractor
from PIL import Image
import opendatasets as od
from PIL import Image, ImageFilter, ImageChops, ImageEnhance, ImageOps
from google.colab import files

# XGBoost with text features

In [None]:
# reading all the text features
X_text_features = pd.read_csv("feature_arrays/text_features.csv")
y = pd.read_csv("feature_arrays/labels.csv")

In [None]:
X_text_features.drop(['Unnamed: 0'], axis = 1, inplace = True)
y.drop(['Unnamed: 0'], axis = 1, inplace = True)
display(X_text_features)
display(y)

In [None]:
# Splitting dataset
# Training - 70%
# Test - 30%
X_train, X_test, y_train, y_test = train_test_split(X_text_features, y, test_size=0.3, random_state=42, stratify=y)

y_actual = y_test

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

print("y_train: ", y_train.shape)
print("y_test: ", y_test.shape)

display(X_train)
display(y_train)
display(X_test)
display(y_test)

In [None]:
# training a and testing a model with only the text features

# define and train model
model = XGBClassifier()
model.fit(X_train, y_train)

# then predict on test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

y_for_texts = predictions

# calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", (accuracy * 100.0))

In [None]:
text = 'This is the accuracy score for just the text features'
with open('evaluation_results.csv','w') as f:
    f.write(text)
    f.write("\n"+str(accuracy)+"\n")
    f.write("\n")

In [None]:
# plotting the confusion matrix
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test)
plt.show()

In [None]:
# plot the feature importance values
sorted_idx = model.feature_importances_.argsort()
plt.figure(figsize=(6,8))
plt.barh(X_train.columns[sorted_idx], model.feature_importances_[sorted_idx])
plt.xlabel("Feature Importance")

In [None]:
# built in function of XGBoost for feature importance
plot_importance(model)
plt.show()

# XGBoost with image features

In [None]:
# read all the image features
X_image_features = pd.read_csv("feature_arrays/image_features.csv")
X_image_features.drop(['Unnamed: 0'], axis = 1, inplace = True)
X_image_features

In [None]:
# Splitting dataset
# Training - 70%
# Test - 30%
X_train, X_test, y_train, y_test = train_test_split(X_image_features, y, test_size=0.3, random_state=42, stratify=y)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

print("y_train: ", y_train.shape)
print("y_test: ", y_test.shape)

display(X_train)
display(y_train)
display(X_test)
display(y_test)

In [None]:
# training and testing a model on just the image features

# define and train model
model = XGBClassifier()
model.fit(X_train, y_train)

# then predict on test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
y_for_images = predictions

# calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", (accuracy * 100.0))

In [None]:
text = 'This is the accuracy score for just the image features'
with open('evaluation_results.csv','a+') as f:
    f.write(text)
    f.write("\n"+str(accuracy)+"\n")
    f.write("\n")

In [None]:
# plotting the confusion matrix
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test)
plt.show()

In [None]:
# plot the feature importance values
sorted_idx = model.feature_importances_.argsort()
plt.figure(figsize=(6,8))
plt.barh(X_train.columns[sorted_idx], model.feature_importances_[sorted_idx])
plt.xlabel("Feature Importance")

In [None]:
# built in function of XGBoost for feature importance
plot_importance(model)
plt.show()

# XGBoost with both features

In [None]:
# read all the features (text+image combined)
X_all_features = pd.read_csv("feature_arrays/all_features.csv")
X_all_features.drop(['Unnamed: 0'], axis = 1, inplace = True)
X_all_features

In [None]:
# Splitting dataset
# Training - 70%
# Test - 30%
X_train, X_test, y_train, y_test = train_test_split(X_all_features, y, test_size=0.3, random_state=42, stratify=y)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

print("y_train: ", y_train.shape)
print("y_test: ", y_test.shape)

display(X_train)
display(y_train)
display(X_test)
display(y_test)

In [None]:
# training and testing a model with both text and image features

# define and train model
model = XGBClassifier()
model.fit(X_train, y_train)

# then predict on test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
y_for_both = predictions

# calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", (accuracy * 100.0))

In [None]:
text = 'This is the accuracy score for both text + image features'
with open('evaluation_results.csv','a+') as f:
    f.write(text)
    f.write("\n"+str(accuracy)+"\n")
    f.write("\n")

In [None]:
# plotting the confusion matrix
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test)
plt.savefig('confusionmatrix.png', dpi=1000)
plt.show()

In [None]:
sorted_idx = model.feature_importances_.argsort()
plt.figure(figsize=(6,8))
plt.barh(X_train.columns[sorted_idx], model.feature_importances_[sorted_idx])
plt.xlabel("Feature Importance")
plt.savefig("feature_imp_sorted.png", bbox_inches="tight", dpi=1000)

In [None]:
top_feature_names = []
for i in range(len(X_train.columns)):
  if model.feature_importances_[i] > 0.060:
    top_feature_names.append(X_train.columns[i])

top_feature_names = np.array(top_feature_names)

print(top_feature_names)

In [None]:
# built in function of XGBoost for feature importance
plot_importance(model)
plt.savefig('featureimp_xgboost.png')
plt.show()

In [None]:
# plotting the decision tree for the model with both features
fig, ax = plt.subplots(1,1, figsize=(10,8), dpi=600)
plot_tree(model, ax=ax)
plt.savefig('decision_tree.png')
plt.show()

# Extracting correct samples

In [None]:
truth_list = y_test['labels'].tolist()

In [None]:
# getting index of correct samples in y_pred, then just choose 3 of those for explanation
correct_samples = []

for i in range(len(y_pred)):
  if truth_list[i] == y_pred[i]:
    correct_samples.append(i)

correct_samples = correct_samples[4:7]

# loop to see what is the label of these correct samples- 0 (men) or 1 (women)
for i in range(len(correct_samples)):
  print(truth_list[correct_samples[i]])

print(correct_samples)

In [None]:
# using above indices to get image ids from id.csv

ids = pd.read_csv("feature_arrays/id.csv")
ids.drop(['Unnamed: 0'], axis = 1, inplace = True)

correct_ids = []
for i in range(len(correct_samples)):
  correct_ids.append(ids.iloc[correct_samples[i]][0])

print(correct_ids)

In [None]:
# read the text dataset file "X_for_text_exaplanation" and get the text of these three samples

X_for_text_explanation = pd.read_csv("feature_arrays/X_for_text_explanation.csv")
X_for_text_explanation.drop(['Unnamed: 0'], axis = 1, inplace = True)

all_sents = []

for each_value in correct_samples:
  # num = X_for_text_explanation[X_for_text_explanation['id']==each_value].index.values
  all_sents.append(X_for_text_explanation.iloc[each_value][1])

all_sents

In [None]:
# use your api key for the step below

od.download("https://www.kaggle.com/datasets/paramaggarwal/fashion-product-images-small")

In [None]:
temp_idl = correct_ids

for i in tqdm(range(len(temp_idl))):

  image_name = "fashion-product-images-small/images/"+str(temp_idl[i])+".jpg"
  image = Image.open(image_name)
  image.save(str(temp_idl[i])+".png")
  print(image)


In [None]:
# the feature values for these three correctly classified samples
for i in range(len(correct_samples)):
  print(X_all_features.iloc[correct_samples[i]])

# Extracting incorrect samples

In [None]:
# getting index of incorrect samples in y_pred, then just choose 3 of those for explanation
incorrect_samples = []

for i in range(len(y_pred)):
  if truth_list[i] != y_pred[i]:
    incorrect_samples.append(i)

incorrect_samples = [incorrect_samples[41],incorrect_samples[46]]

# loop to see what is the label of these incorrect samples- 0 (men) or 1 (women)
for i in range(len(incorrect_samples)):
  print(truth_list[incorrect_samples[i]])

incorrect_samples

In [None]:
# using above indices to get image ids from id.csv

incorrect_ids = []
for i in range(len(incorrect_samples)):
  incorrect_ids.append(ids.iloc[incorrect_samples[i]][0])

incorrect_ids

In [None]:
# read the text dataset file "X_for_text_exaplanation" and get the text of these three samples

all_sents = []

for each_value in incorrect_samples:
  all_sents.append(X_for_text_explanation.iloc[each_value][1])

all_sents

In [None]:
# then download the image dataset and the image features of these three
temp_idl = incorrect_ids

for i in tqdm(range(len(temp_idl))):

  image_name = "fashion-product-images-small/images/"+str(temp_idl[i])+".jpg"
  image = Image.open(image_name)
  image.save(str(temp_idl[i])+".png")
  print(image)


In [None]:
# the feature values for these three correctly classified samples
for i in range(len(incorrect_samples)):
  print(X_all_features.iloc[incorrect_samples[i]])

# Counter-factual example type 1

In [None]:
# checking some samples where either text/image alone was predicting the correct label
# but the other label was predicting the wrong label
# and combining them caused the overall model to predict the wrong label

In [None]:
all_mismatch1 = []
all_mismatch2 = []
l1 = []

for i in range(len(y_for_texts)):
  if y_for_texts[i] != y_for_images[i]:
    all_mismatch1.append(i)

for i in range(len(y_for_both)):
  if y_for_both[i] != y_actual.iloc[i][0]:
    all_mismatch2.append(i)

list1 = list(set(all_mismatch1).intersection(set(all_mismatch2)))

for i in range(len(list1)):
  n = list1[i]
  print("Mismatch sample index: ", n)
  print("Actual label of the input at that index: ", y_actual.iloc[n][0])
  print("Predicted label with both modalities: ", y_for_both[n])
  print("Predicted label with just text: ", y_for_texts[n])
  print("Predicted label with just image: ", y_for_images[n])
  print("\n")

In [None]:
# using above indices to get image ids from id.csv

ids = pd.read_csv("feature_arrays/id.csv")
ids.drop(['Unnamed: 0'], axis = 1, inplace = True)

mismatch_ids = []
for i in range(len(list1)):
  mismatch_ids.append(ids.iloc[list1[i]][0])

mismatch_ids

In [None]:
# read the text dataset file "X_for_text_exaplanation" and get the text of these three samples

X_for_text_explanation = pd.read_csv("feature_arrays/X_for_text_explanation.csv")
X_for_text_explanation.drop(['Unnamed: 0'], axis = 1, inplace = True)

all_sents = []

for each_value in list1:
  all_sents.append(X_for_text_explanation.iloc[each_value][1])

all_sents

In [None]:
# the feature values for these three correctly classified samples
for i in range(len(list1)):
  print(X_all_features.iloc[list1[i]])

# Counter-factual example type 2

In [None]:
# take one of the rows, remove some text from the input text and then give it to the model
# see if there are any changes

In [None]:
truth_list = y_test['labels'].tolist()

In [None]:
# getting index of correct samples in y_pred, then just choose 3 of those for explanation
correct_samples = []
y_test_one = []

for i in range(len(y_pred)):
  if truth_list[i] == y_pred[i]:
    correct_samples.append(i)

correct_samples = correct_samples[4:5]

# loop to see what is the label of these correct samples- 0 (men) or 1 (women)
for i in range(len(correct_samples)):
  print(truth_list[correct_samples[i]])
  y_test_one.append(truth_list[correct_samples[i]])

print(correct_samples)


# using above indices to get image ids from id.csv

ids = pd.read_csv("feature_arrays/id.csv")
ids.drop(['Unnamed: 0'], axis = 1, inplace = True)

correct_ids = []
for i in range(len(correct_samples)):
  correct_ids.append(ids.iloc[correct_samples[i]][0])

print(correct_ids)

In [None]:
# read the text dataset file "X_for_text_exaplanation" and get the text of these three samples

X_for_text_explanation = pd.read_csv("feature_arrays/X_for_text_explanation.csv")
X_for_text_explanation.drop(['Unnamed: 0'], axis = 1, inplace = True)

all_sents = []

for each_value in correct_samples:
  # num = X_for_text_explanation[X_for_text_explanation['id']==each_value].index.values
  all_sents.append(X_for_text_explanation.iloc[each_value][1])

all_sents

In [None]:
changed_sent = all_sents[0].replace('T-shirt Topwear','')
changed_sent

In [None]:
columns = X_train.columns[0:8].tolist()
X_text = pd.DataFrame(columns = columns)
X_text.head()

In [None]:
# get features from changed text first

tokenizer_text_features = AutoTokenizer.from_pretrained("Showroom/clothing_subcategory_classifier")
model_text_features = AutoModelForSequenceClassification.from_pretrained("Showroom/clothing_subcategory_classifier")

temp_dictionary = {}
temp_list_of_dictionaries = []

inputs = tokenizer_text_features(changed_sent, return_tensors="pt")
outputs = model_text_features(**inputs)
logits = outputs.logits
probs = logits.softmax(dim=1)

topk = torch.topk(logits, 8).indices # out of index error with numbers>8 so this model calculates 8 features
for each_value in topk[0]:
  temp_dictionary[model_text_features.config.id2label[each_value.item()]] = probs[0][each_value.item()].item()

temp_list_of_dictionaries.append(temp_dictionary)

X_text = X_text.append(temp_list_of_dictionaries)
X_text

In [None]:
# image features remain the same becasue we're not making any changes to it
# so get the image features from "X_all_features" (but use only image columns)

# the sample we're using is id 1615 but it's index in the table is 4

X_image = X_all_features.iloc[4:5, 8:] # row number 4
# and all columns (which are the feature) from 8 onwards, becasue the first 8 features are text
X_image

In [None]:
# X_together = pd.concat([X_text, X_image], axis=0, ignore_index=True)
X_together = pd.concat([X_text, X_image.set_index(X_text.index)], axis=1)

X_together

In [None]:
# then predict on the single changed data again
y_pred_one = model.predict(X_together)
predictions_one = [round(value) for value in y_pred_one]

# calculate accuracy
accuracy_one = accuracy_score(y_test_one, predictions_one)
print("Accuracy:", (accuracy_one * 100.0))

# Counter-factual example type 3

In [None]:
# try removing something from an image

In [None]:
correct_samples = [1758]
correct_ids = [6]

X_text = X_all_features.iloc[5:6, :8]
X_text_new = pd.DataFrame(np.repeat(X_text.values, 4, axis=0))
X_text_new.columns = X_text.columns
X_text_new

In [None]:
changed_image_list = []

for i in tqdm(range(len(correct_samples))):

  image_name = "fashion-product-images-small/images/"+str(correct_samples[i])+".jpg"
  image = Image.open(image_name)

  # inverting image
  invert_image = ImageChops.invert(image)
  changed_image_list.append(invert_image)
  invert_image.save("inverted_image.png")
  invert_image.show()

  # mirroring
  mirror_image = ImageOps.mirror(image)
  changed_image_list.append(mirror_image)
  mirror_image.save("mirrored_image.png")
  mirror_image.show()

  # inverting and mirroring
  iv_image = ImageOps.mirror(invert_image)
  changed_image_list.append(iv_image)
  iv_image.save("iv_image.png")
  iv_image.show()

  # contour
  cont_image = image.filter(ImageFilter.CONTOUR)
  changed_image_list.append(cont_image)
  cont_image.save("cont_image.png")
  cont_image.show()


In [None]:
columns = X_train.columns[8:].tolist()
X_image = pd.DataFrame(columns = columns)
X_image.head()

In [None]:
# get features from changed image first

extractor_image_features = AutoFeatureExtractor.from_pretrained("aalonso-developer/vit-base-clothing-leafs-example-full-simple_highres")
model_image_features = AutoModelForImageClassification.from_pretrained("aalonso-developer/vit-base-clothing-leafs-example-full-simple_highres")

temp_dictionary = {}
temp_list_of_dictionaries = []

for i in range(len(changed_image_list)):

  temp_dictionary = {}
  inputs = extractor_image_features(changed_image_list[i], return_tensors="pt")

  with torch.no_grad():
      logits = model_image_features(**inputs).logits

  probs = logits.softmax(dim=1) # we can take the softmax to get the label probabilities
  topk = torch.topk(logits, 3).indices
  for each_value in topk[0]:
    temp_dictionary[model_image_features.config.id2label[each_value.item()]] = probs[0][each_value.item()].item()

  temp_list_of_dictionaries.append(temp_dictionary)

X_image = X_image.append(temp_list_of_dictionaries)
X_image

In [None]:
X_image=X_image.fillna(0)


In [None]:
X_together = pd.concat([X_text_new, X_image], axis=1)
X_together

In [None]:
# then predict on the single changed data again
y_pred_change_images = model.predict(X_together)
predictions_change_images = [round(value) for value in y_pred_change_images]

y_test_change_images = [0,0,0,0]
for i in range(len(y_pred_change_images)):
  print(y_test_change_images[i], predictions_change_images[i])

# calculate accuracy
accuracy_one = accuracy_score(y_test_change_images, predictions_change_images)
print("Accuracy:", (accuracy_one * 100.0))

# Counter-factual example type 4

In [None]:
# modify both image and text and train model

In [None]:
correct_text = 'Lotto  White Collared Jacket Topwear Fall Sports'
correct_text = correct_text.replace('Jacket Topwear', '')
correct_text = correct_text.replace('Sports', '')
correct_text

In [None]:
columns = X_train.columns[0:8].tolist()
X_text = pd.DataFrame(columns = columns)
X_text.head()

In [None]:
# get features from changed text first

tokenizer_text_features = AutoTokenizer.from_pretrained("Showroom/clothing_subcategory_classifier")
model_text_features = AutoModelForSequenceClassification.from_pretrained("Showroom/clothing_subcategory_classifier")

temp_dictionary = {}
temp_list_of_dictionaries = []

inputs = tokenizer_text_features(correct_text, return_tensors="pt")
outputs = model_text_features(**inputs)
logits = outputs.logits
probs = logits.softmax(dim=1)

topk = torch.topk(logits, 8).indices # out of index error with numbers>8 so this model calculates 8 features
for each_value in topk[0]:
  temp_dictionary[model_text_features.config.id2label[each_value.item()]] = probs[0][each_value.item()].item()

temp_list_of_dictionaries.append(temp_dictionary)

X_text = X_text.append(temp_list_of_dictionaries)
X_text

In [None]:
X_text_new = pd.DataFrame(np.repeat(X_text.values, 4, axis=0))
X_text_new.columns = X_text.columns
X_text_new

In [None]:
X_together = pd.concat([X_text_new, X_image], axis=1)
X_together

In [None]:
# then predict on the single changed data again
y_pred_change_images = model.predict(X_together)
predictions_change_images = [round(value) for value in y_pred_change_images]

y_test_change_images = [0,0,0,0]
for i in range(len(y_pred_change_images)):
  print(y_test_change_images[i], predictions_change_images[i])

# calculate accuracy
accuracy_one = accuracy_score(y_test_change_images, predictions_change_images)
print("Accuracy:", (accuracy_one * 100.0))

# Counter-factual example type 5

In [None]:
# remove the top features from the X_all_features and then train model again

In [None]:
X_all_features_remove = X_all_features.drop(top_feature_names, axis = 1)
X_all_features_remove

In [None]:
# Splitting dataset
# Training - 70%
# Test - 30%
X_train, X_test, y_train, y_test = train_test_split(X_all_features_remove, y, test_size=0.3, random_state=42, stratify=y)


In [None]:
# define and train model
model_remove = XGBClassifier()
model_remove.fit(X_train, y_train)

# then predict on test data
y_pred_remove = model_remove.predict(X_test)
predictions_remove = [round(value) for value in y_pred_remove]

# calculate accuracy
accuracy = accuracy_score(y_test, predictions_remove)
print("Accuracy:", (accuracy * 100.0))

# no significant decrease in accuracy

# Download all result files

In [None]:
files.download("confusionmatrix.png")
files.download("feature_imp_sorted.png")
files.download("featureimp_xgboost.png")
files.download("decision_tree.png")