# **Classification of app reviews for requirements engineering using deep learning models**

## **Data Loading and Preprocessing**

In [1]:
import datetime
import os
import time

import joblib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from datasets import Dataset
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import MaxAbsScaler
from sklearn.svm import LinearSVC


In [2]:
# Load dataset
file_path = 'dataset/balanced/gpt_balanced_8000.xlsx'
dataframe = pd.read_excel(file_path)

# data clean
dataframe['review'] = dataframe['review'].fillna('')

dataset = Dataset.from_pandas(dataframe)
texts = dataset['review']
labels = dataset['new_label']


In [3]:
# Dataset splitting
X_train, X_test, y_train, y_test = train_test_split(
    texts, labels, test_size=0.2, shuffle=True, stratify=labels
)

In [4]:
# Feature Scaling (TF-IDF + MaxAbsScaler)
tfidf_vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

scaler = MaxAbsScaler()
X_train_scaled = scaler.fit_transform(X_train_tfidf)
X_test_scaled = scaler.transform(X_test_tfidf)

## **Hyperparameter Tuning**

In [None]:
param_grid = {
    'C': [0.01, 0.1, 0.5, 1],
    'tol': [1e-5, 1e-4, 1e-3],
    'max_iter': [5000, 10000, 20000],
    'class_weight': [None, 'balanced']
}

# param_grid = {
#     'C': [0.1],
#     'tol': [1e-5],
#     'max_iter': [5000],
# }

grid_search = GridSearchCV(
    LinearSVC(dual=False, random_state=42), 
    param_grid, 
    cv=5, 
    scoring='f1_macro',
    n_jobs=-1
)

grid_search.fit(X_train_scaled, y_train)

print("Best Parameters:", grid_search.best_params_)


## **Model Training**

In [None]:
best_model = grid_search.best_estimator_
start_time = time.time()

best_model.fit(X_train_scaled, y_train)

end_time = time.time()
training_time = end_time - start_time

## **Model Evaluation**

In [None]:
y_pred = best_model.predict(X_test_scaled)

In [None]:
# Calculate evaluation metrics 
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

report_dict = classification_report(y_test, y_pred, target_names=['bug report', 'feature request', 'rating', 'user experience'], output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()
report_df = report_df.round(4)

print("\nClassification Report:\n", report_df)

macro_f1 = report_dict["macro avg"]["f1-score"]
print(f"\nMacro F1 Score: {macro_f1:.4f}")
print(f"Training Time: {training_time:.2f} seconds")
# Confusion Matrix Visualization
# plt.figure(figsize=(8, 6))
# sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', 
#             xticklabels=['bug report', 'feature request', 'rating', 'user experience'], 
#             yticklabels=['bug report', 'feature request', 'rating', 'user experience'])
# plt.title('Confusion Matrix')
# plt.xlabel('Predicted Labels')
# plt.ylabel('True Labels')
# plt.show()


In [None]:
# Save the classification report
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
report_path = os.path.join('models/svm', f"classification_report_{timestamp}.csv")
report_df.to_csv(report_path, float_format='%.4f')
print(f"Classification report saved to {report_path}")

In [None]:
# Save the model
# os.makedirs('models/svm', exist_ok=True)
# model_filename = f'models/svm/best_model_{timestamp}.pkl'

# joblib.dump(best_model, model_filename)
# print(f"Model saved to {model_filename}")