<a href="https://colab.research.google.com/github/wiktorialasek/Thesis/blob/etap1/elon_tweet_stock_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Elon Musk Tweet Stock Impact Analysis

# 📌 This notebook analyzes Elon Musk's tweets and their impact on selected stock prices.
# It automatically maps tweets to tickers and fetches historical stock data around the time of the tweet.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import re
from datetime import timedelta

# ✅ Parameters (can be changed by user)
seconds_after_tweet = 10
plot_duration_minutes = 5
interval = "1m"  # Options: '1m', '2m', '5m', etc.

# ✅ Mapping from keywords to tickers
ticker_keywords = {
    "TSLA": ["tesla", "tsla"],
    "DOGE-USD": ["doge", "dogecoin"],
    "BTC-USD": ["bitcoin", "btc"],
    "TWTR": ["twitter", "twtr"],
    "SPY": ["s&p", "spy", "sp500", "s&p 500"]
}

In [None]:
# # ✅ Load Elon Musk tweets CSV (from Kaggle or local path)
# import kagglehub

# # Download latest version
# path = kagglehub.dataset_download("dadalyndell/elon-musk-tweets-2010-to-2025-march")

# print("Path to dataset files:", path)

# tweets_df = pd.read_csv(path + "/all_musk_posts.csv")


In [None]:
# # Zidentyfikuj odpowiednie kolumny (dopasowanie niezależne od wielkości liter)
# # Print column names to inspect them
# print("Columns in tweets_df:", tweets_df.columns.tolist())


# id_col = next((col for col in tweets_df.columns if col.lower() in ["id"]), None)
# text_col = next((col for col in tweets_df.columns if col.lower() in ["fulltext"]), None) # Changed to lowercase 'fulltext'
# date_col = next((col for col in tweets_df.columns if col.lower() in ["createdat"]), None) # Changed to lowercase 'createdat'


# # Add a check to ensure all required columns were found
# if id_col is None or text_col is None or date_col is None:
#     raise ValueError("Could not find required columns (id, fullText, createdAt) in the DataFrame. Please check the column names in your CSV.")


# # Zostaw tylko te trzy kolumny
# reduced_df = tweets_df[[id_col, text_col, date_col]]
# reduced_df.columns = ["id", "fullText", "createdAt"]  # standaryzacja nazw

# # Zapisz do nowego pliku CSV
# reduced_df.to_csv("elonmusk_tweets_reduced.csv", index=False)

# print("Zapisano plik: elonmusk_tweets_reduced.csv")

Columns in tweets_df: ['id', 'url', 'twitterUrl', 'fullText', 'retweetCount', 'replyCount', 'likeCount', 'quoteCount', 'viewCount', 'createdAt', 'bookmarkCount', 'isReply', 'inReplyToId', 'conversationId', 'inReplyToUserId', 'inReplyToUsername', 'isPinned', 'isRetweet', 'isQuote', 'isConversationControlled', 'possiblySensitive', 'quoteId', 'quote', 'retweet', 'ticker']
Zapisano plik: elonmusk_tweets_reduced.csv


In [None]:
# prompt: chce zeby korzystalo z tego pliku z tej sciezki /content/drive/MyDrive/tweets_trimmed.csv i zeby przeszukalo tam w fullText czy jest cos o tych tickerach i zeby zostawilo tylko te wiersze a reze usunelo i zeby ten ticker ktory znalzlo, lub kilka tickerow zeby byly wypisane w kolumnie w danych

# Załaduj plik CSV z określonej ścieżki
try:
    tweets_df = pd.read_csv('/content/drive/MyDrive/tweets_trimmed.csv')
    print("Pomyślnie załadowano plik: /content/drive/MyDrive/tweets_trimmed.csv")
    print("Dostępne kolumny:", tweets_df.columns.tolist())
except FileNotFoundError:
    print("Błąd: Plik '/content/drive/MyDrive/tweets_trimmed.csv' nie został znaleziony. Upewnij się, że plik istnieje i ścieżka jest poprawna.")
    exit()

# Zidentyfikuj kolumnę z tekstem tweetów (szukaj niezależnie od wielkości liter 'fulltext')
text_col_name = None
for col in tweets_df.columns:
    if col.lower() == 'fulltext':
        text_col_name = col
        break

if text_col_name is None:
    print("Błąd: Kolumna 'fullText' (lub 'fulltext') nie została znaleziona w pliku CSV.")
    exit()

# Utwórz nową kolumnę do przechowywania znalezionych tickerów
tweets_df['Found_Tickers'] = None

# Funkcja do wyszukiwania tickerów w tekście tweetu
def find_tickers_in_tweet(text):
    found_tickers = []
    if isinstance(text, str): # Sprawdź czy tekst nie jest NaN
        text_lower = text.lower()
        for ticker, keywords in ticker_keywords.items():
            for keyword in keywords:
                # Użyj wyrażenia regularnego do znalezienia całych słów lub symboli
                if re.search(r'\b' + re.escape(keyword) + r'\b', text_lower):
                    found_tickers.append(ticker)
                    break # Zatrzymaj szukanie dla tego tickera po znalezieniu jednego słowa kluczowego
    return list(set(found_tickers)) # Usuń duplikaty tickerów

# Zastosuj funkcję do kolumny z tekstem tweetów i wypełnij nową kolumnę
tweets_df['Found_Tickers'] = tweets_df[text_col_name].apply(find_tickers_in_tweet)

# Odfiltruj wiersze, które nie zawierają żadnych znalezionych tickerów
filtered_tweets_df = tweets_df[tweets_df['Found_Tickers'].apply(lambda x: len(x) > 0)].copy()

# Wyświetl pierwsze kilka wierszy przefiltrowanego DataFrame
print("\nPrzefiltrowane tweety z znalezionymi tickerami:")
print(filtered_tweets_df.head())

# Wyświetl liczbę znalezionych tweetów
print(f"\nZnaleziono {len(filtered_tweets_df)} tweetów zawierających słowa kluczowe tickerów.")

# Teraz 'filtered_tweets_df' zawiera tylko te tweety, w których znaleziono tickery, a kolumna 'Found_Tickers' zawiera listę znalezionych tickerów dla każdego tweetu.
# Możesz kontynuować analizę na tym przefiltrowanym DataFrame.

Pomyślnie załadowano plik: /content/drive/MyDrive/tweets_trimmed.csv
Dostępne kolumny: ['id', 'fullText', 'createdAt']

Przefiltrowane tweety z znalezionymi tickerami:
                     id                                           fullText  \
2   1623774484795920384  RT @BillyM2k: dude bookmarks are an awesome tw...   
6   1602885009647366144        RT @Tesla: Holiday Update rolling out now 🎅   
12  1633159188787658757  @anothercohen He has a prominent, active Twitt...   
32  1591585984747286528  @ashleevance @mtaibbi @joerogan Twitter drives...   
77  1551013753700753409          @ajtourville @ICannot_Enough @Twitter Odd   

                    createdAt Found_Tickers  
2   2023-02-09 20:03:00+00:00        [TWTR]  
6   2022-12-14 04:35:41+00:00        [TSLA]  
12  2023-03-07 17:34:27+00:00        [TWTR]  
32  2022-11-13 00:17:23+00:00        [TWTR]  
77  2022-07-24 01:17:49+00:00        [TWTR]  

Znaleziono 4031 tweetów zawierających słowa kluczowe tickerów.


In [None]:
# prompt: zapisz mi te nowe dane w pliku csv zeby mi sie pobral

from google.colab import files

# Define the filename for the CSV
output_filename = 'filtered_elonmusk_tweets_with_tickers.csv'

# Save the filtered DataFrame to CSV
filtered_tweets_df.to_csv(output_filename, index=False)

print(f"\nSaved filtered data to: {output_filename}")

# Trigger download
# prompt: zapisz mi te nowe dane w pliku csv zeby mi sie pobral

from google.colab import files

# Define the filename for the CSV
output_filename = 'filtered_elonmusk_tweets_with_tickers.csv'

# Save the filtered DataFrame to CSV
filtered_tweets_df.to_csv(output_filename, index=False)

print(f"\nSaved filtered data to: {output_filename}")

# Trigger download
files.download(output_filename)
print(f"Triggering download for {output_filename}")


Saved filtered data to: filtered_elonmusk_tweets_with_tickers.csv

Saved filtered data to: filtered_elonmusk_tweets_with_tickers.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Triggering download for filtered_elonmusk_tweets_with_tickers.csv


In [None]:
# ✅ Prepare result list
results = []


In [None]:
filtered_tweets_df.head()

Unnamed: 0,id,fullText,createdAt,Found_Tickers
2,1623774484795920384,RT @BillyM2k: dude bookmarks are an awesome tw...,2023-02-09 20:03:00+00:00,[TWTR]
6,1602885009647366144,RT @Tesla: Holiday Update rolling out now 🎅,2022-12-14 04:35:41+00:00,[TSLA]
12,1633159188787658757,"@anothercohen He has a prominent, active Twitt...",2023-03-07 17:34:27+00:00,[TWTR]
32,1591585984747286528,@ashleevance @mtaibbi @joerogan Twitter drives...,2022-11-13 00:17:23+00:00,[TWTR]
77,1551013753700753409,@ajtourville @ICannot_Enough @Twitter Odd,2022-07-24 01:17:49+00:00,[TWTR]


In [None]:
from datetime import timedelta
import yfinance as yf
import pandas as pd

# Konwertuj datę
filtered_tweets_df['createdAt'] = pd.to_datetime(filtered_tweets_df['createdAt'])

# Odfiltruj zbyt stare tweety (dla bezpieczeństwa)
filtered_tweets_df = filtered_tweets_df[filtered_tweets_df['createdAt'] >= '2019-01-01']

results = []

for idx, row in filtered_tweets_df.iterrows():
    tweet_time = row['createdAt']
    tickers = row['Found_Tickers']
    text = row['fullText']

    for ticker in tickers:
        # Zakres tylko na dzień publikacji tweeta
        tweet_date = tweet_time.date()
        try:
            df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)

            if df.empty:
                print(f"Brak danych dla {ticker} na dzień {tweet_date}")
                continue

            open_price = df.iloc[0]['Open']
            close_price = df.iloc[0]['Close']

            results.append({
                # "date": tweet_date,
                "datetime": tweet_time,
                "ticker": ticker,
                "text": text,
                "price_open": open_price,
                "price_close": close_price
            })

        except Exception as e:
            print(f"Error processing tweet at {tweet_time} for {ticker}: {e}")

# Zapisz wyniki do CSV
results_df = pd.DataFrame(results)
results_df.to_csv("tweet_open_close_prices.csv", index=False)
print("Zapisano plik: tweet_open_close_prices.csv")



  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TWTR']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-01-12 -> 2025-0

Brak danych dla TWTR na dzień 2025-01-12
Brak danych dla TWTR na dzień 2025-01-11
Brak danych dla TSLA na dzień 2025-01-09


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-03-30 -> 2025-03-31)')
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data

Brak danych dla TSLA na dzień 2025-03-30
Brak danych dla TSLA na dzień 2025-03-30
Brak danych dla TSLA na dzień 2025-03-30
Brak danych dla TSLA na dzień 2025-03-30


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticke

Brak danych dla TWTR na dzień 2025-01-29


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
ERROR:yfinance:
1 Failed

Brak danych dla TSLA na dzień 2025-03-15


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-03-09 -> 2025-03-10)')
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-03-09 -> 2025-03-10)')
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(d

Brak danych dla TSLA na dzień 2025-03-09
Brak danych dla TSLA na dzień 2025-03-09
Brak danych dla TSLA na dzień 2025-03-08


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-03-23 -> 2025-03-24)')
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-03-22 -> 2025-03-23)')
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)


Brak danych dla TSLA na dzień 2025-03-23
Brak danych dla TSLA na dzień 2025-03-22


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-03-22 -> 2025-03-23)')
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress

Brak danych dla TSLA na dzień 2025-03-22


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticke

Brak danych dla TSLA na dzień 2025-02-09
Brak danych dla TSLA na dzień 2025-01-01
Brak danych dla TSLA na dzień 2025-01-01
Brak danych dla TSLA na dzień 2025-01-01


ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-02-23 -> 2025-02-24)')
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)


Brak danych dla TSLA na dzień 2025-02-23


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticke

Brak danych dla TSLA na dzień 2025-03-16
Brak danych dla TSLA na dzień 2025-03-15
Brak danych dla TSLA na dzień 2025-03-02


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticke

Brak danych dla TWTR na dzień 2025-02-24
Brak danych dla TSLA na dzień 2025-04-13


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-04-13 -> 2025-04-14)')
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress

Brak danych dla TSLA na dzień 2025-04-13


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticke

Brak danych dla TSLA na dzień 2025-03-30
Brak danych dla TSLA na dzień 2025-03-30
Brak danych dla TSLA na dzień 2025-03-30
Brak danych dla TSLA na dzień 2025-03-30


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-03-29 -> 2025-03-30)')
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress

Brak danych dla TSLA na dzień 2025-03-29


  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticker, start=str(tweet_date), end=str(tweet_date + timedelta(days=1)), interval='1d', progress=False)
  df = yf.download(ticke

Zapisano plik: tweet_open_close_prices.csv


In [None]:
results_df.head()

Unnamed: 0,datetime,ticker,text,price_open,price_close
0,2025-01-17 07:58:59+00:00,TSLA,"To reiterate, the legacy news headlines are fa...",Ticker TSLA 421.5 Name: 2025-01-17 00:00:00...,Ticker TSLA 426.5 Name: 2025-01-17 00:00:00...
1,2025-01-17 06:18:00+00:00,DOGE-USD,Now @DOGE will do this with government https:/...,Ticker DOGE-USD 0.376706 Name: 2025-01-17 0...,Ticker DOGE-USD 0.415939 Name: 2025-01-17 0...
2,2025-01-17 01:24:00+00:00,BTC-USD,"Maybe she clicked on the “send me 1 btc, I sen...",Ticker BTC-USD 100025.765625 Name: 2025-01-...,Ticker BTC-USD 104462.039062 Name: 2025-01-...
3,2025-01-16 16:05:47+00:00,DOGE-USD,@amuse @VivekGRamaswamy Helping drive @DOGE fr...,Ticker DOGE-USD 0.385713 Name: 2025-01-16 0...,Ticker DOGE-USD 0.376748 Name: 2025-01-16 0...
4,2025-01-13 19:40:14+00:00,DOGE-USD,Just wanted to express appreciation for Presid...,Ticker DOGE-USD 0.335956 Name: 2025-01-13 0...,Ticker DOGE-USD 0.338299 Name: 2025-01-13 0...







Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.



In [None]:
# Pobierz dane dla BTC z 17 stycznia 2025 dla weryfikacji
ticker = "DOGE-USD"
start_date = "2025-01-16"
end_date = "2025-01-17" # Pobieramy do początku następnego dnia, aby upewnić się, że dostaniemy cały 17-ty
interval = "1d" # Dzienny interwał

try:
    btc_data_17jan2025 = yf.download(ticker, start=start_date, end=end_date, interval=interval, progress=False)

    if btc_data_17jan2025.empty:
        print(f"Brak danych dla {ticker} w dniu {start_date}.")
    else:
        print(f"Dane dla {ticker} w dniu {start_date}:")
        print(btc_data_17jan2025)

except Exception as e:
    print(f"Wystąpił błąd podczas pobierania danych dla {ticker}: {e}")



Dane dla DOGE-USD w dniu 2025-01-16:
Price          Close      High       Low      Open      Volume
Ticker      DOGE-USD  DOGE-USD  DOGE-USD  DOGE-USD    DOGE-USD
Date                                                          
2025-01-16  0.376748  0.389349  0.368461  0.385713  4097841421


  btc_data_17jan2025 = yf.download(ticker, start=start_date, end=end_date, interval=interval, progress=False)


In [None]:
# === POBIERANIE 1-MIN ŚWIEC DOGE-USD Z COINBASE PRO ===

import requests
import pandas as pd
from datetime import datetime, timedelta

# 1) Ustaw interesujący Cię przedział
start = datetime(2025, 1, 16, 16, 0, 0)               # od 16:00:00 UTC
end   = start + timedelta(minutes=10)                # do 16:10:00 UTC

# 2) Przygotuj parametry ISO dla API
start_iso = start.isoformat() + "Z"
end_iso   = end.isoformat()   + "Z"

# 3) Zbuduj URL z granularity=60s
url = (
    "https://api.exchange.coinbase.com/products/DOGE-USD/candles"
    f"?granularity=60&start={start_iso}&end={end_iso}"
)

# 4) Pobierz dane
resp = requests.get(url)
resp.raise_for_status()  # wyrzuci wyjątek, jeśli coś pójdzie nie tak
data = resp.json()

# 5) Zamień na DataFrame i uporządkuj
#    Coinbase zwraca listy [time, low, high, open, close, volume]
df = pd.DataFrame(data, columns=["time","low","high","open","close","volume"])
df["time"] = pd.to_datetime(df["time"], unit="s", utc=True)
df = df.sort_values("time").reset_index(drop=True)

# 6) Wyfiltruj świecę z 16:05 UTC
tgt = df[df["time"] == pd.Timestamp("2025-01-16 16:05:00+00:00")]

print("Wszystkie pobrane świece:")
print(df)
print("\nŚwieca 2025-01-16 16:05 UTC:")
print(tgt)


Wszystkie pobrane świece:
                        time      low     high     open    close     volume
0  2025-01-16 16:00:00+00:00  0.38087  0.38175  0.38140  0.38102   663020.3
1  2025-01-16 16:01:00+00:00  0.38089  0.38147  0.38090  0.38125   429498.7
2  2025-01-16 16:02:00+00:00  0.38097  0.38216  0.38130  0.38202  1078804.2
3  2025-01-16 16:03:00+00:00  0.38154  0.38299  0.38186  0.38270   697618.4
4  2025-01-16 16:04:00+00:00  0.38284  0.38468  0.38284  0.38426  1995063.7
5  2025-01-16 16:05:00+00:00  0.38296  0.38483  0.38442  0.38350   476660.1
6  2025-01-16 16:06:00+00:00  0.38356  0.38492  0.38360  0.38489   727803.3
7  2025-01-16 16:07:00+00:00  0.38456  0.38533  0.38490  0.38505   307498.8
8  2025-01-16 16:08:00+00:00  0.38488  0.38557  0.38526  0.38508   820296.8
9  2025-01-16 16:09:00+00:00  0.38488  0.38575  0.38498  0.38549   949895.9
10 2025-01-16 16:10:00+00:00  0.38398  0.38556  0.38549  0.38423  3165203.0

Świeca 2025-01-16 16:05 UTC:
                       time     

In [None]:
# 1. Instalacja (jeśli jeszcze nie masz)
#!pip install requests pandas --quiet

# 2. Importy
import requests
import pandas as pd
from datetime import datetime, timedelta

# 3. Parametry: czas tweeta i długość okna (minuty)
tweet_time = datetime(2025, 1, 16, 16, 5, 47)      # UTC
duration_minutes = 5

# 4. Zaokrąglenie do minuty
start_minute = tweet_time.replace(second=0, microsecond=0)
end_minute   = start_minute + timedelta(minutes=duration_minutes)

# 5. ISO8601 dla Coinbase
start_iso = start_minute.isoformat() + "Z"
end_iso   = end_minute.isoformat()   + "Z"

# 6. Budujemy URL i pobieramy świeczki 1m
url = (
    "https://api.exchange.coinbase.com/products/DOGE-USD/candles"
    f"?granularity=60&start={start_iso}&end={end_iso}"
)

resp = requests.get(url)
resp.raise_for_status()
data = resp.json()

# 7. Do DataFrame i porządki
#    Coinbase: [ time, low, high, open, close, volume ]
df = pd.DataFrame(data,
                  columns=["time","low","high","open","close","volume"])
df["time"] = pd.to_datetime(df["time"], unit="s", utc=True)
df = df.sort_values("time").reset_index(drop=True)

# 8. Wyciągamy cenę close – przybliżenie kursu w każdej minucie
result = df[["time","close"]].copy()
result.columns = ["minute","price_close"]

print("Ceny DOGE-USD (close) co minutę od momentu tweeta:")
print(result)


Ceny DOGE-USD (close) co minutę od momentu tweeta:
                     minute  price_close
0 2025-01-16 16:05:00+00:00      0.38350
1 2025-01-16 16:06:00+00:00      0.38489
2 2025-01-16 16:07:00+00:00      0.38505
3 2025-01-16 16:08:00+00:00      0.38508
4 2025-01-16 16:09:00+00:00      0.38549
5 2025-01-16 16:10:00+00:00      0.38423


In [None]:
# 0) (opcjonalnie) instalacja w Colabie
!pip install yfinance requests --quiet

# 1) Importy
import ast
import requests
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta

# 2) Parametry
duration_minutes = 5   # ile minut od momentu tweeta

# 3) Przygotowanie DF
# -- jeśli nie masz jeszcze wczytanego CSV, zrób coś w stylu:
# filtered_tweets_df = pd.read_csv('filtered_tweets.csv', converters={'Found_Tickers': eval})
# -- poniżej zakładam, że masz już loaded filtered_tweets_df
# Upewnij się, że Found_Tickers to lista
# REMOVED: filtered_tweets_df['Found_Tickers'] = filtered_tweets_df['Found_Tickers'].apply(ast.literal_eval) # Ta linia jest prawdopodobnie zbędna, jeśli kolumna jest już listą
# createdAt na datetime
filtered_tweets_df['createdAt'] = pd.to_datetime(filtered_tweets_df['createdAt'])
# wydzielamy też date dla yfinance
filtered_tweets_df['date'] = filtered_tweets_df['createdAt'].dt.date

# 4) Pobranie dziennego OHLCV z yfinance
def get_daily_ohlc(ticker, date):
    hist = yf.download(
        ticker,
        start=str(date),
        end=str(date + timedelta(days=1)),
        interval='1d',
        progress=False
    )
    if not hist.empty:
        row = hist.iloc[0]
        return row['Low'], row['High'], row['Open'], row['Close'], row['Volume']
    return (None, None, None, None, None)

filtered_tweets_df[['daily_low','daily_high','daily_open','daily_close','daily_volume']] = (
    filtered_tweets_df
    .apply(lambda r: pd.Series(get_daily_ohlc(r['Found_Tickers'][0], r['date'])), axis=1)
)

# 6) Wyniki
print("=== filtered_tweets_df.head() ===")
print(filtered_tweets_df.head().to_string(index=False))


  hist = yf.download(
  hist = yf.download(
  hist = yf.download(
  hist = yf.download(
  hist = yf.download(
  hist = yf.download(
  hist = yf.download(
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TWTR']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-01-12 -> 2025-01-13)')
  hist = yf.download(
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TWTR']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-01-11 -> 2025-01-12)')
  hist = yf.download(
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-01-09 -> 2025-01-10)')
  hist = yf.download(
  hist = yf.download(
  hist = yf.download(
  hist = yf.download(
  hist = yf.download(
  hist = yf.download(
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-03-30 -> 2025-03-31)')
  hist = yf.download(
ERROR:yfinance:
1 Failed downloa

=== filtered_tweets_df.head() ===
                 id                                                                                                                                                                                                                      fullText                 createdAt Found_Tickers       date                                                               daily_low                                                                daily_high                                                                daily_open                                                               daily_close                                                              daily_volume
1880162891283984417                                                                                               To reiterate, the legacy news headlines are false. This fire has nothing to do with Tesla and our Megapacks are operating well. 2025-01-17 07:58:59+00:00        [TSLA] 2025-01-17         Tick

In [None]:
# 0) Instalacja (tylko raz, jeśli jeszcze nie masz)
!pip install yfinance requests --quiet

# 1) Importy
import ast
import requests
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta

# 2) Parametry
duration_minutes = 5   # ile minut od momentu tweeta
crypto_tickers = ['BTC-USD', 'DOGE-USD']

filtered_tweets_df['createdAt'] = pd.to_datetime(filtered_tweets_df['createdAt'])
# wydzielamy date dla yfinance (dzienny OHLC)
filtered_tweets_df['date'] = filtered_tweets_df['createdAt'].dt.date


# 4) Dokładamy dzienne OHLCV z yfinance
def get_daily_ohlc(ticker, date):
    hist = yf.download(
        ticker,
        start=str(date),
        end=str(date + timedelta(days=1)),
        interval='1d',
        progress=False
    )
    if not hist.empty:
        low   = hist['Low'].iloc[0]
        high  = hist['High'].iloc[0]
        op    = hist['Open'].iloc[0]
        cl    = hist['Close'].iloc[0]
        vol   = hist['Volume'].iloc[0]
        return low, high, op, cl, vol

    return (None, None, None, None, None)

# # Ensure that we only try to get daily OHLC if there's at least one ticker found
# filtered_tweets_df[['daily_low','daily_high','daily_open','daily_close','daily_volume']] = (
#     filtered_tweets_df.apply(lambda r: pd.Series(get_daily_ohlc(r['Found_Tickers'][0] if r['Found_Tickers'] else None, r['date'])), axis=1)
# )

# # 5) Pobranie 1-minutowych cen (close) z Coinbase Pro TYLKO dla BTC-USD i DOGE-USD
# minute_records = []

# for _, row in filtered_tweets_df.iterrows():
#     # Check if there are any tickers found for this tweet
#     if not row['Found_Tickers']:
#         continue # Skip if no tickers were found

#     ticker = row['Found_Tickers'][0] # Take the first ticker found for the tweet
#     if ticker not in crypto_tickers:
#         continue  # pomiń wszystkie pozostałe

#     tweet_time = row['createdAt']
#     symbol     = ticker  # już w formacie "BTC-USD" lub "DOGE-USD"

#     # obliczamy przedział [start, end)
#     start_minute = tweet_time.replace(second=0, microsecond=0)
#     end_minute   = start_minute + timedelta(minutes=duration_minutes)

#     # ISO dla Coinbase
#     start_iso = start_minute.isoformat() + "Z"
#     end_iso   = end_minute.isoformat()   + "Z"

#     url = (
#       "https://api.exchange.coinbase.com/products/"
#       f"{symbol}/candles?"
#       f"granularity=60&start={start_iso}&end={end_iso}"
#     )
#     try:
#         resp = requests.get(url)
#         # Coinbase zwróci 404, jeśli symbol nie istnieje – tu już pilnujemy, więc powinno być ok
#         resp.raise_for_status()
#         data = resp.json()  # format: [ time, low, high, open, close, volume ]

#         # zbieramy rekordy
#         for c in data:
#             # Użyj pd.Timestamp zamiast datetime.utcfromtimestamp dla lepszej zgodności z pandas
#             minute_ts = pd.Timestamp(c[0], unit='s', tz='UTC')
#             minute_records.append({
#                 'tweet_id': row['id'],
#                 'ticker':   ticker,
#                 'minute':   minute_ts,
#                 'low':      c[1],
#                 'high':     c[2],
#                 'open':     c[3],
#                 'close':    c[4],
#                 'volume':   c[5]
#             })
#     except requests.exceptions.RequestException as e:
#         print(f"Error fetching data from Coinbase Pro for {symbol} around {tweet_time}: {e}")
#         # Kontynuuj do następnego tweeta/tickera
#     except Exception as e:
#         print(f"An unexpected error occurred processing Coinbase data for {symbol} around {tweet_time}: {e}")


# minutes_df = pd.DataFrame(minute_records)

# 6) Wyniki
# print("=== Dzienny OHLCV (yfinance) – filtered_tweets_df.head() ===")
# print(filtered_tweets_df.head()[[
#     'id','Found_Tickers','daily_low','daily_high','daily_open','daily_close','daily_volume'
# ]].to_string(index=False))

# print("\n=== Minutowe świece (Coinbase Pro) – minutes_df.head() ===")
# print(minutes_df.head().to_string(index=False))

In [None]:
filtered_tweets_df.head()

Unnamed: 0,id,fullText,createdAt,Found_Tickers,date,daily_low,daily_high,daily_open,daily_close,daily_volume
6990,1880162891283984417,"To reiterate, the legacy news headlines are fa...",2025-01-17 07:58:59+00:00,[TSLA],2025-01-17,Ticker TSLA 419.75 Name: 2025-01-17 00:00:0...,Ticker TSLA 439.73999 Name: 2025-01-17 00:0...,Ticker TSLA 421.5 Name: 2025-01-17 00:00:00...,Ticker TSLA 426.5 Name: 2025-01-17 00:00:00...,Ticker TSLA 94991400.0 Name: 2025-01-17 00:...
6996,1880137479128510967,Now @DOGE will do this with government https:/...,2025-01-17 06:18:00+00:00,[DOGE-USD],2025-01-17,Ticker DOGE-USD 0.375973 Name: 2025-01-17 0...,Ticker DOGE-USD 0.420501 Name: 2025-01-17 0...,Ticker DOGE-USD 0.376706 Name: 2025-01-17 0...,Ticker DOGE-USD 0.415939 Name: 2025-01-17 0...,Ticker DOGE-USD 6.276540e+09 Name: 2025-01-...
7023,1880063488489668867,"Maybe she clicked on the “send me 1 btc, I sen...",2025-01-17 01:24:00+00:00,[BTC-USD],2025-01-17,Ticker BTC-USD 99948.90625 Name: 2025-01-17...,Ticker BTC-USD 105884.226562 Name: 2025-01-...,Ticker BTC-USD 100025.765625 Name: 2025-01-...,Ticker BTC-USD 104462.039062 Name: 2025-01-...,Ticker BTC-USD 7.188897e+10 Name: 2025-01-1...
7066,1879923008003059852,@amuse @VivekGRamaswamy Helping drive @DOGE fr...,2025-01-16 16:05:47+00:00,[DOGE-USD],2025-01-16,Ticker DOGE-USD 0.368461 Name: 2025-01-16 0...,Ticker DOGE-USD 0.389349 Name: 2025-01-16 0...,Ticker DOGE-USD 0.385713 Name: 2025-01-16 0...,Ticker DOGE-USD 0.376748 Name: 2025-01-16 0...,Ticker DOGE-USD 4.097841e+09 Name: 2025-01-...
7212,1878889814898172216,Just wanted to express appreciation for Presid...,2025-01-13 19:40:14+00:00,[DOGE-USD],2025-01-13,Ticker DOGE-USD 0.311399 Name: 2025-01-13 0...,Ticker DOGE-USD 0.345214 Name: 2025-01-13 0...,Ticker DOGE-USD 0.335956 Name: 2025-01-13 0...,Ticker DOGE-USD 0.338299 Name: 2025-01-13 0...,Ticker DOGE-USD 3.713751e+09 Name: 2025-01-...


In [None]:

print("\n=== minutes_df.head() ===")
minutes_df.head()


=== minutes_df.head() ===


Unnamed: 0,tweet_id,ticker,minute,low,high,open,close,volume
0,1880137479128510967,DOGE-USD,2025-01-17 06:23:00+00:00,0.38612,0.38671,0.38627,0.38671,44052.9
1,1880137479128510967,DOGE-USD,2025-01-17 06:22:00+00:00,0.38613,0.38657,0.38657,0.38626,87820.9
2,1880137479128510967,DOGE-USD,2025-01-17 06:21:00+00:00,0.38646,0.38684,0.38684,0.38658,200401.8
3,1880137479128510967,DOGE-USD,2025-01-17 06:20:00+00:00,0.3867,0.38712,0.38671,0.38712,624635.9
4,1880137479128510967,DOGE-USD,2025-01-17 06:19:00+00:00,0.38641,0.38675,0.38644,0.38675,109462.8


In [None]:
# 0) (opcjonalnie) instalacja w Colabie
!pip install yfinance requests --quiet

# 1) Importy
import ast
import requests
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta

# 2) Parametry
duration_minutes = 5   # ile minut od momentu tweeta

# 3) Przygotowanie DF
# -- jeśli nie masz jeszcze wczytanego CSV, zrób coś w stylu:
# filtered_tweets_df = pd.read_csv('filtered_tweets.csv', converters={'Found_Tickers': eval})
# -- poniżej zakładam, że masz już loaded filtered_tweets_df
# Upewnij się, że Found_Tickers to lista
filtered_tweets_df['Found_Tickers'] = filtered_tweets_df['Found_Tickers'].apply(ast.literal_eval)
# createdAt na datetime
filtered_tweets_df['createdAt'] = pd.to_datetime(filtered_tweets_df['createdAt'])
# wydzielamy też date dla yfinance
filtered_tweets_df['date'] = filtered_tweets_df['createdAt'].dt.date

# 4) Pobranie dziennego OHLCV z yfinance
def get_daily_ohlc(ticker, date):
    hist = yf.download(
        ticker,
        start=str(date),
        end=str(date + timedelta(days=1)),
        interval='1d',
        progress=False
    )
    if not hist.empty:
        row = hist.iloc[0]
        return row['Low'], row['High'], row['Open'], row['Close'], row['Volume']
    return (None, None, None, None, None)

filtered_tweets_df[['daily_low','daily_high','daily_open','daily_close','daily_volume']] = (
    filtered_tweets_df
    .apply(lambda r: pd.Series(get_daily_ohlc(r['Found_Tickers'][0], r['date'])), axis=1)
)

# 5) Pobranie minutowych cen (close) z Coinbase Pro
minute_records = []

for _, row in filtered_tweets_df.iterrows():
    tweet_time = row['createdAt']
    ticker     = row['Found_Tickers'][0]
    symbol     = f"{ticker}-USD"          # Coinbase Pro notation

    # zaokrąglamy w dół do minuty
    start_minute = tweet_time.replace(second=0, microsecond=0)
    end_minute   = start_minute + timedelta(minutes=duration_minutes)

    # parametry ISO
    start_iso = start_minute.isoformat() + "Z"
    end_iso   = end_minute.isoformat()   + "Z"

    url = (
      "https://api.exchange.coinbase.com/products/"
      f"{symbol}/candles?"
      f"granularity=60&start={start_iso}&end={end_iso}"
    )
    resp = requests.get(url)
    resp.raise_for_status()
    data = resp.json()  # każda świece: [time, low, high, open, close, volume]

    # składamy result
    for c in data:
        minute_ts = datetime.utcfromtimestamp(c[0])
        minute_records.append({
            'tweet_id': row['id'],
            'ticker':   ticker,
            'minute':   minute_ts,
            'close':    c[4]
        })

minutes_df = pd.DataFrame(minute_records)

# 6) Wyniki
print("=== filtered_tweets_df.head() ===")
print(filtered_tweets_df.head().to_string(index=False))

print("\n=== minutes_df.head() ===")
print(minutes_df.head().to_string(index=False))


ValueError: malformed node or string: ['TSLA']

In [None]:
filtered_tweets_df['createdAt'] = pd.to_datetime(filtered_tweets_df['createdAt'])

for idx, row in filtered_tweets_df.iterrows():
    tweet_time = row['createdAt']
    ticker = row['Found_Tickers']
    text = row['fullText']

    start_time = tweet_time - timedelta(minutes=1)
    end_time = tweet_time + timedelta(minutes=plot_duration_minutes)

    try:
        # Download stock data from yfinance
        df = yf.download(ticker, start=start_time, end=end_time, interval=interval, progress=False)

        if df.empty:
            continue

        # Get prices of interest
        open_price = df.iloc[0]['Open']
        close_price = df.iloc[-1]['Close']

        # Price at tweet time or nearest
        price_at_tweet = df.loc[df.index.get_loc(tweet_time, method='nearest')]['Close']

        # Price after defined seconds
        time_after = tweet_time + timedelta(seconds=seconds_after_tweet)
        price_after = df.loc[df.index.get_loc(time_after, method='nearest')]['Close']

        # Append to results
        results.append({
            "datetime": tweet_time,
            "ticker": ticker,
            "text": text,
            "price_open": open_price,
            "price_at_tweet": price_at_tweet,
            "price_after_{}s".format(seconds_after_tweet): price_after,
            "price_close": close_price
        })

        # Plot price movement
        plt.figure(figsize=(10, 4))
        df['Close'].plot()
        plt.axvline(tweet_time, color='red', linestyle='--', label='Tweet Time')
        plt.title(f"{ticker} Price Around Tweet Time")
        plt.xlabel("Time")
        plt.ylabel("Price")
        plt.legend()
        plt.grid(True)
        plt.show()

    except Exception as e:
        print(f"Error processing tweet at {tweet_time} for {ticker}: {e}")

# ✅ Save results
results_df = pd.DataFrame(results)
results_df.to_csv("tweet_stock_reactions.csv", index=False)
print("Saved results to tweet_stock_reactions.csv")

# ✅ Display preview
results_df.head()

TypeError: unsupported operand type(s) for -: 'str' and 'datetime.timedelta'