In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sentence_transformers import SentenceTransformer
from sklearn.metrics import classification_report

In [2]:
# Load the dataset
df = pd.read_excel('Artificial_Data.xlsx')

# Separate structured features, text data, and target variable
X_structured = df.drop(columns=['RATING_TYPE','Rating', 'string_values'])
X_text = df['string_values']
y = df['Rating']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Scale structured data
scaler = StandardScaler()
X_structured_scaled = scaler.fit_transform(X_structured)


In [3]:
# Convert text data to embeddings using Sentence Transformers
text_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
X_text_embeddings = text_model.encode(X_text)

print(f"Text Embeddings Shape: {X_text_embeddings.shape}")


Text Embeddings Shape: (100, 384)


In [4]:
# Combine structured data and text embeddings
X_combined = np.hstack((X_structured_scaled, X_text_embeddings))
print(f"Combined Feature Shape: {X_combined.shape}")


Combined Feature Shape: (100, 535)


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_encoded, test_size=0.2, random_state=42)

In [6]:
# Using Random Forest as an example
combined_model = RandomForestClassifier(n_estimators=100, random_state=42)
combined_model.fit(X_train, y_train)

# Predict and evaluate
y_pred_combined = combined_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_combined)

print(f"Combined Model Accuracy: {accuracy:.4f}")

Combined Model Accuracy: 0.1500


In [8]:
# Predict using the combined model
y_pred_combined = combined_model.predict(X_test)

# Generate the classification report
report = classification_report(y_test, y_pred_combined, target_names=label_encoder.classes_)
print("Classification Report:\n", report)


Classification Report:
               precision    recall  f1-score   support

           A       0.00      0.00      0.00         3
          A+       0.33      0.33      0.33         3
          AA       0.00      0.00      0.00         2
         AA+       0.00      0.00      0.00         4
         AAA       0.00      0.00      0.00         1
          BB       0.00      0.00      0.00         1
         BBB       0.50      0.50      0.50         4
        BBB+       0.00      0.00      0.00         2

    accuracy                           0.15        20
   macro avg       0.10      0.10      0.10        20
weighted avg       0.15      0.15      0.15        20



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
