In [3]:
pip install newsapi

Collecting newsapi
  Downloading newsapi-0.1.1-py2.py3-none-any.whl.metadata (255 bytes)
Downloading newsapi-0.1.1-py2.py3-none-any.whl (4.1 kB)
Installing collected packages: newsapi
Successfully installed newsapi-0.1.1
Note: you may need to restart the kernel to use updated packages.


In [7]:
pip install newsapi-python


Collecting newsapi-python
  Downloading newsapi_python-0.2.7-py2.py3-none-any.whl.metadata (1.2 kB)
Downloading newsapi_python-0.2.7-py2.py3-none-any.whl (7.9 kB)
Installing collected packages: newsapi-python
Successfully installed newsapi-python-0.2.7
Note: you may need to restart the kernel to use updated packages.


In [1]:
pip uninstall -y newsapi


Found existing installation: newsapi 0.1.1
Uninstalling newsapi-0.1.1:
  Successfully uninstalled newsapi-0.1.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install newsapi-python


Note: you may need to restart the kernel to use updated packages.


In [5]:
pip show newsapi-python


Name: newsapi-python
Version: 0.2.7
Summary: An unofficial Python client for the News API
Home-page: https://github.com/mattlisiv/newsapi-python
Author: Matt Lisivick
Author-email: lisivickmatt@gmail.com
License: MIT
Location: /opt/anaconda3/lib/python3.12/site-packages
Requires: requests
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [15]:
!pip show newsapi-python


Name: newsapi-python
Version: 0.2.7
Summary: An unofficial Python client for the News API
Home-page: https://github.com/mattlisiv/newsapi-python
Author: Matt Lisivick
Author-email: lisivickmatt@gmail.com
License: MIT
Location: /opt/anaconda3/lib/python3.12/site-packages
Requires: requests
Required-by: 


In [11]:
!pip install newsapi-python




In [None]:
import time
import pickle
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from newsapi import NewsApiClient
import re

API_KEY = 'bcae1bfec35f4e78a3cdc7accdbffd86'

# Initialize NewsAPI client
newsapi = NewsApiClient(api_key=API_KEY)

# Load existing model and vectorizer
with open("sentiment_model.pkl", "rb") as model_file:
    model = pickle.load(model_file)
with open("vectorizer.pkl", "rb") as vec_file:
    vectorizer = pickle.load(vec_file)

def fetch_real_time_news():
    """Fetch latest news using NewsAPI."""
    articles = newsapi.get_everything(q='stock market', language='en', sort_by='publishedAt')
    news_data = []
    for article in articles['articles']:
        news_data.append({
            'publishedAt': article['publishedAt'],
            'title': article['title'],
            'description': article['description'],
            'content': article['content']
        })
    return pd.DataFrame(news_data)

def preprocess_text(df):
    """Clean and preprocess text data."""
    def clean_text(text):
        text = re.sub(r'http\S+', '', text)  # Remove URLs
        text = re.sub(r'[^\w\s]', '', text)  # Remove special characters
        text = re.sub(r'\s+', ' ', text).strip()  # Remove extra whitespaces
        return text

    df['text'] = df[['title', 'description', 'content']].fillna('').agg(' '.join, axis=1)
    df['clean_text'] = df['text'].apply(clean_text)
    return df[['publishedAt', 'clean_text']]

def retrain_model(new_data):
    """Retrain the model with new data."""
    global model, vectorizer

    # Load existing labeled data
    labeled_data = pd.read_csv("labeled_news_data.csv")
    labeled_data = pd.concat([labeled_data, new_data], ignore_index=True)

    # Split data into training and testing sets
    X = labeled_data['clean_text']
    y = labeled_data['sentiment']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Retrain the model
    X_train_vec = vectorizer.fit_transform(X_train)
    X_test_vec = vectorizer.transform(X_test)
    model.fit(X_train_vec, y_train)

    # Save the updated model and vectorizer
    with open("sentiment_model.pkl", "wb") as model_file:
        pickle.dump(model, model_file)
    with open("vectorizer.pkl", "wb") as vec_file:
        pickle.dump(vectorizer, vec_file)

    print("Model retrained and updated successfully!")

def automate_training(interval=3600):
    """Automate the entire training pipeline."""
    while True:
        print("Fetching new news data...")
        real_time_data = fetch_real_time_news()
        if not real_time_data.empty:
            real_time_data = preprocess_text(real_time_data)
            real_time_data['sentiment'] = model.predict(vectorizer.transform(real_time_data['clean_text']))

            # Append labeled data to the dataset
            real_time_data.to_csv("labeled_news_data.csv", mode='a', header=False, index=False)

            # Retrain the model with the new data
            retrain_model(real_time_data)

        print(f"Waiting for the next iteration... ({interval} seconds)")
        time.sleep(interval)

# Start the automation
automate_training(interval=3600)  # Runs every hour


Fetching new news data...
Model retrained and updated successfully!
Waiting for the next iteration... (3600 seconds)
