In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import cupy as cp  # Import CuPy for GPU arrays

# Load Data
data = pd.read_csv('wine_reviews_processed.csv')
X = data['description']
y = data['country']

# Label Encoding
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Vectorize Text (TF-IDF remains on CPU but we convert to CuPy afterward)
vectorizer = TfidfVectorizer(max_features=50, stop_words='english')
X = vectorizer.fit_transform(X).toarray()  # Convert to dense matrix on CPU
X = cp.array(X)  # Transfer data to GPU with CuPy

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X.get(), y, test_size=0.2, random_state=42)
# `.get()` brings CuPy arrays back to NumPy for compatibility with scikit-learn

# Train CPU-based SVM Classifier
from sklearn.svm import SVC  # Use scikit-learn's SVC, as cuML is not available on Windows
svm_model = SVC(kernel='linear', C=1)
svm_model.fit(X_train, y_train)

# Predict and Evaluate
y_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


: 