In [2]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression
import pickle
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder

# Загрузка данных (замените на свой код загрузки данных)
df = pd.read_csv('fake_or_real_news.csv')

# Преобразование текстов в TF-IDF признаки
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['text'])
y = LabelEncoder().fit_transform(df['label'])

# Разделение данных на обучающую и тестовую выборки
from sklearn.model_selection import train_test_split
X_train_tfidf, X_test_tfidf, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Определение сетки гиперпараметров для логистической регрессии
param_grid_lr = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

# Определение модели с Grid Search и кросс-валидацией
grid_search_lr = GridSearchCV(LogisticRegression(max_iter=1000), param_grid_lr, cv=StratifiedKFold(5), scoring='accuracy')

# Обучение модели
grid_search_lr.fit(X_train_tfidf, y_train)

# Сохранение модели и векторизатора
with open('src/model_lr.pkl', 'wb') as model_file:
    pickle.dump(grid_search_lr.best_estimator_, model_file)

with open('src/vectorizer.pkl', 'wb') as vec_file:
    pickle.dump(vectorizer, vec_file)
