In [122]:
import requests
import pandas as pd
from dotenv import load_dotenv
import json
import os, re
import ast

In [123]:
load_dotenv()

BEARER_TOKEN = os.getenv("BEARER_TOKEN")
QUERY = "TSLA OR Tesla lang:en -is:retweet"  # requête (mot-clé, langue, exclusion des RT)
MAX_RESULTS = 50  # entre 10 et 100 (limite de l’API gratuite)
N_TWEETS = 200    # total de tweets que tu veux récupérer (l’API renvoie 100 max par page)

In [124]:
def search_tweets(query, bearer_token, n_tweets=N_TWEETS):
    headers = {"Authorization": f"Bearer {bearer_token}"}
    url = "https://api.x.com/2/tweets/search/recent"

    params = {
        "query": query,
        "max_results": MAX_RESULTS,
        "tweet.fields": "id,text,created_at,public_metrics,lang",
    }

    tweets = []
    next_token = None

    while len(tweets) < n_tweets:
        if next_token:
            params["next_token"] = next_token

        response = requests.get(url, headers=headers, params=params)
        if response.status_code != 200:
            print("Erreur:", response.status_code, response.text)
            break

        data = response.json()
        tweets.extend(data.get("data", []))

        next_token = data.get("meta", {}).get("next_token")
        if not next_token:
            break

    return tweets[:n_tweets]


tweets = search_tweets(QUERY, BEARER_TOKEN, N_TWEETS)
df = pd.DataFrame(tweets)
print(df.head())

filename = "tweets_TSLA.csv"

if not os.path.exists(filename):
    df.to_csv(filename, index=False)
    print(f"✅ Fichier créé : {filename}")
else:
    print(f"⚠️ Le fichier {filename} existe déjà, aucune écriture effectuée.")

Erreur: 429 {"title":"Too Many Requests","detail":"Too Many Requests","type":"about:blank","status":429}
Empty DataFrame
Columns: []
Index: []
⚠️ Le fichier tweets_TSLA.csv existe déjà, aucune écriture effectuée.


In [None]:
df = pd.read_csv("tweets_TSLA.csv")
print(df.columns)
print(df.shape)

Index(['id', 'edit_history_tweet_ids', 'text', 'lang', 'public_metrics',
       'created_at'],
      dtype='object')
(100, 6)


In [None]:
# Formattage de la date
df["created_at"] = pd.to_datetime(df["created_at"], utc=True)
df["created_at_local"] = df["created_at"].dt.tz_convert("Europe/Paris")
df["date"] = df["created_at_local"].dt.date
df["hour"] = df["created_at_local"].dt.time
df["weekday"] = df["created_at_local"].dt.day_name()

# si regroupement en heure entière
# df["hour_int"] = df["created_at_local"].dt.hour

df = df.drop(["id", "edit_history_tweet_ids", "created_at", "created_at_local"], axis=1)
print(df["date"].nunique())

df.head(3)


1


Unnamed: 0,text,lang,public_metrics,date,hour,weekday
0,I just earned +10.00 BSD in the simulation mar...,en,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",2025-10-16,11:30:38,Thursday
1,Tesla - Hang Tough (HQ) https://t.co/eiYNM0B72...,en,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",2025-10-16,11:30:37,Thursday
2,"@Tesla The Lightning Network enables fast, low...",en,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",2025-10-16,11:30:34,Thursday


In [None]:
pd.set_option('display.max_colwidth', None)
print(df["text"][:5])

0    I just earned +10.00 BSD in the simulation mar...
1    Tesla - Hang Tough (HQ) https://t.co/eiYNM0B72...
2    @Tesla The Lightning Network enables fast, low...
3    @DaFunkyBeatsX @SawyerMerritt @Tesla I guess i...
4    RT @venuguntupli7: Rare Earth, Battery and Rob...
Name: text, dtype: object


In [127]:
print(json.dumps(df["public_metrics"][0], indent=2))
df["metrics"] = df["public_metrics"].apply(ast.literal_eval)

"{'retweet_count': 0, 'reply_count': 0, 'like_count': 0, 'quote_count': 0, 'bookmark_count': 0, 'impression_count': 0}"


In [128]:
def clean_tweet(text):
    text = re.sub(r"http\S+", "", text)          # liens
    text = re.sub(r"@\w+", "", text)             # mentions
    text = re.sub(r"#", "", text)                # hashtag symbol
    text = re.sub(r"[^\w\s$]", "", text)         # ponctuation sauf $
    text = re.sub(r"\s+", " ", text).strip()     # espaces multiples
    return text

df["clean_text"] = df["text"].astype(str).apply(clean_tweet)
df = df[df["clean_text"].str.len() > 5]  # supprime tweets vides ou trop courts

df[["text", "clean_text"]].head(5)


Unnamed: 0,text,clean_text
0,I just earned +10.00 BSD in the simulation mar...,I just earned 1000 BSD in the simulation marke...
1,Tesla - Hang Tough (HQ) https://t.co/eiYNM0B72...,Tesla Hang Tough HQ via
2,"@Tesla The Lightning Network enables fast, low...",The Lightning Network enables fast lowcost Bit...
3,@DaFunkyBeatsX @SawyerMerritt @Tesla I guess i...,I guess it will do it sustainably
4,"RT @venuguntupli7: Rare Earth, Battery and Rob...",RT Rare Earth Battery and Robotics is the next...


In [129]:
def extract_metrics(metrics_str):
    try:
        metrics = ast.literal_eval(metrics_str)
        return metrics.get("like_count", 0), metrics.get("retweet_count", 0)
    except:
        return 0, 0

df[["likes", "retweets"]] = df["public_metrics"].apply(lambda x: pd.Series(extract_metrics(str(x))))


In [130]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

df["sentiment_score"] = df["clean_text"].apply(lambda x: analyzer.polarity_scores(x)["compound"])
df["sentiment_label"] = df["sentiment_score"].apply(lambda x: "positive" if x > 0.05 else ("negative" if x < -0.05 else "neutral"))

In [131]:
from transformers import pipeline
sentiment_model = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")

df["sentiment"] = df["clean_text"].apply(lambda x: sentiment_model(x[:512])[0]["label"])

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


In [132]:
# Regroupement par jour
sentiment_daily = (
    df.groupby("date")["sentiment_score"]
    .mean()
    .reset_index()
)

print(sentiment_daily.head())


         date  sentiment_score
0  2025-10-16          0.17053


In [133]:
sentiment_daily = (
    df.groupby(["date", "sentiment"])
    .size()
    .unstack(fill_value=0)
    .reset_index()
)

print(sentiment_daily.head())


sentiment        date  negative  neutral  positive
0          2025-10-16        10       29        61


In [1]:
import snscrape.modules.twitter as sntwitter
import pandas as pd

query = "TSLA since:2025-10-10 until:2025-10-16"
tweets = []

for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
    if i > 100:  # limiter pour éviter les blocages
        break
    tweets.append([tweet.date, tweet.user.username, tweet.content])

df = pd.DataFrame(tweets, columns=["date", "user", "text"])
print(df.head())


AttributeError: 'FileFinder' object has no attribute 'find_module'