In [1]:
pip install transformers torch

Note: you may need to restart the kernel to use updated packages.


In [2]:
import yfinance as yf
import pandas as pd
from datetime import datetime
import json

# --- Tickers list ---
tickers = ["AAPL", "MSFT", "AMZN", "GOOGL", "TSLA",
           "MC.PA", "TTE.PA", "SAN.PA", "AIR.PA", "SU.PA"]

# --- Fetch & clean ---
all_data = {}
for t in tickers:
    df = yf.Ticker(t).history(period="1wk", interval="1h")[["Close", "Volume"]]
    df.index = df.index.tz_convert('UTC')
    df = df.interpolate(method='time', limit_direction='both')
    df = df.reset_index().rename(columns={'Datetime': 'datetime'})

    # Extraire date et heure séparément
    df["date"] = df["datetime"].dt.strftime("%Y-%m-%d")
    df["time"] = df["datetime"].dt.strftime("%H:%M:%S")

    all_data[t] = df

# --- Structure pour export JSON ---
export = []
for ticker, df in all_data.items():
    export.append({
        "ticker": ticker,
        "data": [
            {
                "date": row["date"],
                "time": row["time"],
                "close": round(row["Close"], 2),
                "volume": int(row["Volume"])
            }
            for _, row in df.iterrows()
        ]
    })

# --- Sauvegarde JSON ---
with open("stocks_1w_hourly_clean.json", "w") as f:
    json.dump(export, f, indent=2)

print("✅ Data formatted and exported to stocks_1w_hourly_clean.json")


✅ Data formatted and exported to stocks_1w_hourly_clean.json


In [3]:
import yfinance as yf
import pandas as pd

# List of 10 stock tickers (you can change them)
tickers = ["AAPL", "MSFT", "AMZN", "GOOGL", "TSLA",
           "MC.PA", "TTE.PA", "SAN.PA", "AIR.PA", "SU.PA"]

# Dictionary to store all data
all_data = {}

# Loop through tickers
for ticker_symbol in tickers:
    ticker = yf.Ticker(ticker_symbol)
    # Fetch historical data for 1 month with 1-hour intervals
    data = ticker.history(period="1mo", interval="1h")
    all_data[ticker_symbol] = data
    print(f"Data for {ticker_symbol}:")
    print(data.head(), "\n")  # show first few rows

# Optionally, combine into a single DataFrame
combined_data = pd.concat(all_data, axis=1)
print(combined_data.head())

Data for AAPL:
                                 Open        High         Low       Close  \
Datetime                                                                    
2025-09-16 09:30:00-04:00  237.000000  241.220001  236.323502  239.529999   
2025-09-16 10:30:00-04:00  239.529999  239.850006  238.244995  239.592896   
2025-09-16 11:30:00-04:00  239.580093  239.580093  238.464996  239.195007   
2025-09-16 12:30:00-04:00  239.199997  239.399994  238.899994  239.085602   
2025-09-16 13:30:00-04:00  239.100006  239.110001  238.130005  238.929993   

                             Volume  Dividends  Stock Splits  
Datetime                                                      
2025-09-16 09:30:00-04:00  14419277        0.0           0.0  
2025-09-16 10:30:00-04:00   5338278        0.0           0.0  
2025-09-16 11:30:00-04:00   3100557        0.0           0.0  
2025-09-16 12:30:00-04:00   2224680        0.0           0.0  
2025-09-16 13:30:00-04:00   2717104        0.0           0.0   

Da

In [4]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# Your API key
API_KEY = "c682a0eeff664f1b83715929492c7007"

# List of 10 companies/tickers
companies = ["AAPL", "MSFT", "AMZN", "GOOGL", "TSLA",
           "MC.PA", "TTE.PA", "SAN.PA", "AIR.PA", "SU.PA"]

# Date range: last month
end_date = datetime.today()
start_date = end_date - timedelta(days=30)
from_date = start_date.strftime("%Y-%m-%d")
to_date = end_date.strftime("%Y-%m-%d")

# Prepare a list to store all articles
all_articles = []

# Loop through each company
for company in companies:
    url = (
    f"https://newsapi.org/v2/everything?"
    f"q={company}&"
    f"from={from_date}&"
    f"to={to_date}&"
    f"sortBy=publishedAt&"
    f"pageSize=100&"
    f"language=en&"  # <-- filter English articles
    f"apiKey={API_KEY}"
    )

    response = requests.get(url)
    data = response.json()

    # Check if articles exist
    articles = data.get("articles", [])
    for article in articles:
        all_articles.append({
            "Company": company,
            "Title": article.get("title"),
            "Summary": article.get("description"),
            "URL": article.get("url"),
            "PublishedAt": article.get("publishedAt")
        })

# Convert to DataFrame
df = pd.DataFrame(all_articles)

# Display the table
print(df.head(20))  # show first 20 rows
print(f"Total articles fetched: {len(df)}")

   Company                                              Title  \
0     AAPL   Sell Your Winners Before It’s Too Late [Podcast]   
1     AAPL  Belkin、MagSafe/Qi2 25Wワイヤレス充電に対応したiPhone Air/1...   
2     AAPL  Stock market today: Dow, S&P 500, Nasdaq slip ...   
3     AAPL  Stock market today: Dow, S&P 500, Nasdaq climb...   
4     AAPL  Parallels、macOS 26 TahoeやWindows 11 25H2をサポートし...   
5     AAPL          Apple M5チップを搭載した「MacBook Pro」が近く発表されるもよう。   
6     AAPL  Sustained iPhone demand points to 2026 upside ...   
7     AAPL  Dear Salesforce Stock Fans, Mark Your Calendar...   
8     AAPL  Stocks Turn Mixed as Walmart-OpenAI Deal Pushe...   
9     AAPL  Here's How Much $100 Invested In Apple 15 Year...   
10    AAPL   Stocks Fall as US-China Trade Conflict Escalates   
11    AAPL                              rustybt added to PyPI   
12    AAPL  Friday’s selloff broke something in the stock ...   
13    AAPL  Global markets tumble as Beijing imposes new b...   
14    AAPL  Global market

In [5]:
# Convert all summaries to strings and replace NaN with empty string
df["Summary"] = df["Summary"].fillna("").astype(str)

# Optional: remove articles with empty summaries entirely
df = df[df["Summary"].str.strip() != ""].reset_index(drop=True)

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# FinBERT pré-entraîné pour sentiment financier
MODEL_NAME = "yiyanghkust/finbert-tone"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
def analyze_sentiment(text):
    # Tokenize le texte
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)

    # Passage dans le modèle
    outputs = model(**inputs)

    # Softmax pour obtenir les probabilités
    probs = F.softmax(outputs.logits, dim=1)

    # Indices et labels
    sentiment_labels = ["positive", "neutral", "negative"]
    sentiment_idx = torch.argmax(probs, dim=1).item()

    # Probabilité du label choisi
    sentiment_score = probs[0][sentiment_idx].item()

    return sentiment_labels[sentiment_idx], sentiment_score


In [8]:
sentiments = [analyze_sentiment(text) for text in df["Summary"]]

df["Sentiment"] = [s[0] for s in sentiments]
df["SentimentScore"] = [s[1] for s in sentiments]


# Show results
print(df.head(20))


   Company                                              Title  \
0     AAPL   Sell Your Winners Before It’s Too Late [Podcast]   
1     AAPL  Belkin、MagSafe/Qi2 25Wワイヤレス充電に対応したiPhone Air/1...   
2     AAPL  Stock market today: Dow, S&P 500, Nasdaq slip ...   
3     AAPL  Stock market today: Dow, S&P 500, Nasdaq climb...   
4     AAPL  Parallels、macOS 26 TahoeやWindows 11 25H2をサポートし...   
5     AAPL          Apple M5チップを搭載した「MacBook Pro」が近く発表されるもよう。   
6     AAPL  Sustained iPhone demand points to 2026 upside ...   
7     AAPL  Dear Salesforce Stock Fans, Mark Your Calendar...   
8     AAPL  Stocks Turn Mixed as Walmart-OpenAI Deal Pushe...   
9     AAPL  Here's How Much $100 Invested In Apple 15 Year...   
10    AAPL   Stocks Fall as US-China Trade Conflict Escalates   
11    AAPL                              rustybt added to PyPI   
12    AAPL  Friday’s selloff broke something in the stock ...   
13    AAPL  Global markets tumble as Beijing imposes new b...   
14    AAPL  Global market