In [24]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

In [25]:
train_df = pd.read_csv("data/train.csv")
test_df = pd.read_csv("data/test.csv")

In [26]:
def replace_null_with_text(df, column_name, replacement_text):
    """Replaces null values in the specified column with the given text"""
    null_indices = df[column_name].isnull() 
    df.loc[null_indices, column_name] = replacement_text
    return df 

test_df = replace_null_with_text(test_df, 'TEXT', "2001 a space odyssey is one of the greatest movies of all time")



In [27]:
train_df = train_df.dropna()
# test_df = test_df.dropna()

In [28]:
test_df.shape

(17580, 2)

In [29]:
label_map = {'Not a (movie/TV show) review': 0, 'Positive (movie/TV show) review': 1, 'Negative (movie/TV show) review': 2}
train_df['label'] = train_df['LABEL'].map(label_map)
# test_df['label'] = test_df['LABEL'].map(label_map)

In [41]:
vectorizer = TfidfVectorizer(ngram_range=(1, 3))


In [42]:
X_train = vectorizer.fit_transform(train_df['TEXT'])
X_test = vectorizer.transform(test_df['TEXT'])

In [43]:
clf = LinearSVC()
clf.fit(X_train, train_df['LABEL'])



In [44]:
y_pred = clf.predict(X_test)
test_df['LABEL'] = y_pred

In [45]:
submission_df = pd.DataFrame({'ID': test_df['ID'], 'LABEL': y_pred})

In [46]:
submission_df.shape

(17580, 2)

In [47]:
submission_df.to_csv("submission.csv", index=False)