In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load datasets
train_df = pd.read_csv('twitter_training.csv')
val_df = pd.read_csv('twitter_validation.csv')

# Clean text data
train_df.dropna(inplace=True)
val_df.dropna(inplace=True)
X_train, y_train = train_df['text'], train_df['sentiment']
X_val, y_val = val_df['text'], val_df['sentiment']

# Vectorize text
vectorizer = CountVectorizer(stop_words='english', max_features=3000)
X_train_vec = vectorizer.fit_transform(X_train)
X_val_vec = vectorizer.transform(X_val)

# Train model
model = LogisticRegression(max_iter=200)
model.fit(X_train_vec, y_train)

# Predict
y_pred = model.predict(X_val_vec)
acc = accuracy_score(y_val, y_pred)
print('Accuracy:', round(acc, 2))

# Save predictions
val_df['predicted_sentiment'] = y_pred
val_df.to_csv('twitter_sentiment_results.csv', index=False)
print('Results saved to twitter_sentiment_results.csv')