In [None]:
test_size=0.2
n_estimators=100

In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import torch
from transformers import BertTokenizer, BertModel
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

###############################################
# 1. Text prior: obtain hidden state via BERT from the first row of Excel text
###############################################
# Change to your Excel file path
text_data_path = '../Sample Data Texts.xlsx'
df_text = pd.read_excel(text_data_path)
# Get the first row's "List of Store Names" text
first_text = df_text['List of Store Names'].iloc[0]
print("First row text:", first_text)

# Use pre-trained BERT model (e.g., bert-base-chinese if text is Chinese)
tokenizer = BertTokenizer.from_pretrained("bert-base-chinese")
bert_model = BertModel.from_pretrained("bert-base-chinese")
bert_model.eval()  # Freeze BERT parameters
with torch.no_grad():
    inputs = tokenizer(first_text, return_tensors="pt", truncation=True, padding=True)
    outputs = bert_model(**inputs)
    # Take the [CLS] token's hidden state, shape (1, 768)
    text_hidden_state = outputs.last_hidden_state[:, 0, :]
    text_hidden_state = text_hidden_state.squeeze(0)  # Convert to (768,)
print("Text hidden state shape:", text_hidden_state.shape)

###############################################
# 2. Image data loading and preprocessing
###############################################
# Change to your image dataset folder path; assume each subfolder name is a class label
img_data_dir = '../raw'

image_features = []  # Store raw image features (1024 dimensions)
labels = []          # Store class labels

for class_name in os.listdir(img_data_dir):
    class_path = os.path.join(img_data_dir, class_name)
    if os.path.isdir(class_path):
        for filename in os.listdir(class_path):
            if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                file_path = os.path.join(class_path, filename)
                try:
                    img = Image.open(file_path)
                    img = img.convert('L')              # Convert to grayscale
                    img = img.resize((32, 32))          # Resize to 32×32
                    img_array = np.array(img).flatten() # Flatten to a 1024-dimensional vector
                    image_features.append(img_array)
                    labels.append(class_name)
                except Exception as e:
                    print(f"Error reading {file_path}:", e)

# Convert to NumPy array and normalize to [0, 1]
image_features = np.array(image_features, dtype='float32') / 255.0
labels = np.array(labels)
print("Number of image samples:", image_features.shape[0])
print("Original image feature dimension:", image_features.shape[1])
print("Classes:", np.unique(labels))

###############################################
# 3. Split into training and test sets
###############################################
X_train, X_test, y_train, y_test = train_test_split(
    image_features, labels, test_size=test_size, stratify=labels
)
print("Number of training samples:", X_train.shape[0], "Number of test samples:", X_test.shape[0])

###############################################
# 4. Image feature dimensionality reduction (using PCA)
###############################################
# Reduce image features to 128 dimensions
pca = PCA(n_components=128)
X_train_reduced = pca.fit_transform(X_train)
X_test_reduced  = pca.transform(X_test)
print("Reduced feature dimension for training set:", X_train_reduced.shape[1])

###############################################
# 5. Multimodal feature construction: concatenate image features with text prior
###############################################
# Text prior is a fixed 768-dimensional vector obtained from BERT
def fuse_features(X, text_vector):
    n_samples = X.shape[0]
    # Expand text vector to each sample
    text_expanded = np.tile(text_vector.cpu().numpy(), (n_samples, 1))  # Shape: (n_samples, 768)
    fused = np.concatenate([X, text_expanded], axis=1)                 # Fused shape: (n_samples, 128+768=896)
    return fused

fused_train = fuse_features(X_train_reduced, text_hidden_state)
fused_test  = fuse_features(X_test_reduced, text_hidden_state)
print("Fused feature dimension:", fused_train.shape[1])

###############################################
# 6. Train Random Forest model
###############################################
rf_model = RandomForestClassifier(n_estimators=n_estimators, random_state=42)
rf_model.fit(fused_train, y_train)
y_pred = rf_model.predict(fused_test)

acc = accuracy_score(y_test, y_pred)
print("Test set accuracy:", acc)
print("Classification report:")
print(classification_report(y_test, y_pred, target_names=np.unique(labels)))

