In [93]:
import pandas as pd
import matplotlib.pyplot as plt

sentiment_type = 'positive_sentiment'


# Assuming sentiment_scores is already a list of floats and split into three columns
tweets_df = pd.read_csv('./sentiment_scores.csv')
stock_prices_df = pd.read_csv('./stock_yfinance_data.csv')

# Convert the 'Date' columns to datetime and normalize to just the date
tweets_df['Date'] = pd.to_datetime(tweets_df['Date']).dt.date
stock_prices_df['Date'] = pd.to_datetime(stock_prices_df['Date']).dt.date

# Merge the sentiment data with the stock prices data on the date and stock name
merged_df = pd.merge(stock_prices_df, tweets_df, left_on=['Date', 'Stock Name'], right_on=['Date', 'Stock Name'])

# Group by Date and Stock Name, and then calculate the mean sentiment
average_sentiment_per_day = merged_df.groupby(['Date', 'Stock Name'])[sentiment_type].mean().reset_index()

# Merge the average sentiment back with the stock prices
merged_with_average_sentiment = pd.merge(stock_prices_df, average_sentiment_per_day, on=['Date', 'Stock Name'])

# Filter for a specific stock
start_dates = ['2021-09-30','2022-01-01','2022-06-30','2021-09-30']
end_dates = ['2022-01-01','2022-06-30','2022-09-30','2022-09-30']
for i in merged_with_average_sentiment['Stock Name'].unique():
    for x in range(len(start_dates)):
        specific_stock_df = merged_with_average_sentiment[merged_with_average_sentiment['Stock Name'] == i]

        # Set the date as the index for the specific stock
        specific_stock_df.set_index('Date', inplace=True)

        start_date = start_dates[x]
        end_date = end_dates[x]

        # Convert strings to datetime
        start_date = pd.to_datetime(start_date).date()
        end_date = pd.to_datetime(end_date).date()

        # Filter the DataFrame for the date range
        demo_stock_df = specific_stock_df[(specific_stock_df.index >= start_date) & (specific_stock_df.index <= end_date)]

        # Assuming 'merged_data' is your DataFrame containing the 'Close' and 'positive_sentiment' columns
        # 计算positive_sentiment的变化率
        demo_stock_df['positive_sentiment_change'] = demo_stock_df['positive_sentiment'].pct_change()

        # 计算股价的变化率
        demo_stock_df['Close_change'] = demo_stock_df['Close'].pct_change()

        # 去除因为计算变化率而产生的NaN值
        demo_stock_df = demo_stock_df.dropna()

        # 计算变化率之间的皮尔逊相关系数
        change_correlation = demo_stock_df['Close_change'].corr(demo_stock_df['positive_sentiment_change'])

        # 计算连续两天的positive_sentiment和股价的差异
        demo_stock_df['sentiment_diff'] = demo_stock_df['positive_sentiment'].diff()
        demo_stock_df['close_diff'] = demo_stock_df['Close'].diff()

        # 标记方向一致性：两个差异同为正或同为负
        demo_stock_df['direction_consistent'] = (demo_stock_df['sentiment_diff'] * demo_stock_df['close_diff'] > 0).astype(int)

        # 计算方向一致性的天数与总天数的比例
        consistency_percentage = demo_stock_df['direction_consistent'].mean() * 100


        demo_df = tweets_df[(tweets_df['Date'] >= start_date) & (tweets_df['Date'] <= end_date)]
        demo_df = demo_df[demo_df['Stock Name']==i]
        num = demo_df['Tweet'].count()
        name = demo_df['Company Name'].unique()[0]
        print(f'{i} & {name} & from {start_date} to {end_date} & {num} & {consistency_percentage:.2f}\% & {change_correlation:.2f} \\\ ')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] = demo_stock_df['positive_sentiment'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['Close_change'] = demo_stock_df['Close'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] 

TSLA & Tesla, Inc. & from 2021-09-30 to 2022-01-01 & 11074 & 62.50\% & 0.33 \\ 
TSLA & Tesla, Inc. & from 2022-01-01 to 2022-06-30 & 18851 & 72.36\% & 0.55 \\ 
TSLA & Tesla, Inc. & from 2022-06-30 to 2022-09-30 & 7643 & 52.38\% & 0.21 \\ 
TSLA & Tesla, Inc. & from 2021-09-30 to 2022-09-30 & 37422 & 65.74\% & 0.43 \\ 
MSFT & Microsoft Corporation & from 2021-09-30 to 2022-01-01 & 785 & 43.75\% & -0.05 \\ 
MSFT & Microsoft Corporation & from 2022-01-01 to 2022-06-30 & 2468 & 51.22\% & 0.13 \\ 
MSFT & Microsoft Corporation & from 2022-06-30 to 2022-09-30 & 857 & 57.14\% & 0.34 \\ 
MSFT & Microsoft Corporation & from 2021-09-30 to 2022-09-30 & 4089 & 51.39\% & 0.06 \\ 
PG & Procter & Gamble Company & from 2021-09-30 to 2022-01-01 & 785 & 48.44\% & -0.04 \\ 
PG & Procter & Gamble Company & from 2022-01-01 to 2022-06-30 & 2468 & 46.34\% & 0.08 \\ 
PG & Procter & Gamble Company & from 2022-06-30 to 2022-09-30 & 857 & 47.62\% & 0.00 \\ 
PG & Procter & Gamble Company & from 2021-09-30 to 2022-0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] = demo_stock_df['positive_sentiment'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['Close_change'] = demo_stock_df['Close'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] 

NFLX & Netflix, Inc. & from 2022-01-01 to 2022-06-30 & 1207 & 47.32\% & 0.03 \\ 
NFLX & Netflix, Inc. & from 2022-06-30 to 2022-09-30 & 291 & 50.91\% & -0.15 \\ 
NFLX & Netflix, Inc. & from 2021-09-30 to 2022-09-30 & 1727 & 51.98\% & 0.04 \\ 
TSM & Taiwan Semiconductor Manufacturing Company Limited & from 2021-09-30 to 2022-01-01 & 2860 & 59.38\% & 0.26 \\ 
TSM & Taiwan Semiconductor Manufacturing Company Limited & from 2022-01-01 to 2022-06-30 & 6115 & 53.66\% & 0.11 \\ 
TSM & Taiwan Semiconductor Manufacturing Company Limited & from 2022-06-30 to 2022-09-30 & 2113 & 42.86\% & -0.08 \\ 
TSM & Taiwan Semiconductor Manufacturing Company Limited & from 2021-09-30 to 2022-09-30 & 11034 & 52.59\% & 0.07 \\ 
KO & The Coca-Cola Company & from 2021-09-30 to 2022-01-01 & 43 & 43.48\% & -0.30 \\ 
KO & The Coca-Cola Company & from 2022-01-01 to 2022-06-30 & 168 & 46.48\% & 0.02 \\ 
KO & The Coca-Cola Company & from 2022-06-30 to 2022-09-30 & 99 & 44.44\% & 0.12 \\ 
KO & The Coca-Cola Company & f

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] = demo_stock_df['positive_sentiment'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['Close_change'] = demo_stock_df['Close'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] 

COST & Costco Wholesale Corporation & from 2021-09-30 to 2022-09-30 & 393 & 57.69\% & 0.10 \\ 
DIS & The Walt Disney Company & from 2021-09-30 to 2022-01-01 & 193 & 61.54\% & -0.02 \\ 
DIS & The Walt Disney Company & from 2022-01-01 to 2022-06-30 & 347 & 45.16\% & -0.01 \\ 
DIS & The Walt Disney Company & from 2022-06-30 to 2022-09-30 & 102 & 54.05\% & 0.19 \\ 
DIS & The Walt Disney Company & from 2021-09-30 to 2022-09-30 & 635 & 53.01\% & 0.05 \\ 
VZ & Verizon Communications Inc. & from 2021-09-30 to 2022-01-01 & 33 & 57.89\% & -0.34 \\ 
VZ & Verizon Communications Inc. & from 2022-01-01 to 2022-06-30 & 50 & 31.58\% & -0.19 \\ 
VZ & Verizon Communications Inc. & from 2022-06-30 to 2022-09-30 & 41 & 52.00\% & 0.04 \\ 
VZ & Verizon Communications Inc. & from 2021-09-30 to 2022-09-30 & 123 & 50.00\% & -0.10 \\ 
CRM & Salesforce, Inc. & from 2021-09-30 to 2022-01-01 & 58 & 64.29\% & 0.07 \\ 
CRM & Salesforce, Inc. & from 2022-01-01 to 2022-06-30 & 113 & 50.00\% & -0.15 \\ 
CRM & Salesforc

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] = demo_stock_df['positive_sentiment'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['Close_change'] = demo_stock_df['Close'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] 

BA & The Boeing Company & from 2021-09-30 to 2022-09-30 & 399 & 59.87\% & 0.01 \\ 
BX & Blackstone Inc. & from 2021-09-30 to 2022-01-01 & 10 & 50.00\% & 0.19 \\ 
BX & Blackstone Inc. & from 2022-01-01 to 2022-06-30 & 25 & 46.67\% & 0.75 \\ 
BX & Blackstone Inc. & from 2022-06-30 to 2022-09-30 & 15 & 60.00\% & -0.94 \\ 
BX & Blackstone Inc. & from 2021-09-30 to 2022-09-30 & 50 & 53.57\% & -0.10 \\ 
NOC & Northrop Grumman Corporation & from 2021-09-30 to 2022-01-01 & 4 & 33.33\% & 0.40 \\ 
NOC & Northrop Grumman Corporation & from 2022-01-01 to 2022-06-30 & 22 & 42.86\% & -0.25 \\ 
NOC & Northrop Grumman Corporation & from 2022-06-30 to 2022-09-30 & 6 & 60.00\% & 0.93 \\ 
NOC & Northrop Grumman Corporation & from 2021-09-30 to 2022-09-30 & 31 & 47.83\% & -0.15 \\ 
PYPL & PayPal Holdings, Inc. & from 2021-09-30 to 2022-01-01 & 287 & 47.37\% & 0.05 \\ 
PYPL & PayPal Holdings, Inc. & from 2022-01-01 to 2022-06-30 & 463 & 57.61\% & 0.14 \\ 
PYPL & PayPal Holdings, Inc. & from 2022-06-30 to 2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] = demo_stock_df['positive_sentiment'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['Close_change'] = demo_stock_df['Close'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] 

NIO & NIO Inc. & from 2022-06-30 to 2022-09-30 & 462 & 55.56\% & 0.08 \\ 
NIO & NIO Inc. & from 2021-09-30 to 2022-09-30 & 3021 & 58.17\% & 0.20 \\ 
ZS & Zscaler, Inc. & from 2021-09-30 to 2022-01-01 & 61 & 40.00\% & 0.04 \\ 
ZS & Zscaler, Inc. & from 2022-01-01 to 2022-06-30 & 99 & 44.44\% & 0.01 \\ 
ZS & Zscaler, Inc. & from 2022-06-30 to 2022-09-30 & 35 & 52.38\% & -0.20 \\ 
ZS & Zscaler, Inc. & from 2021-09-30 to 2022-09-30 & 193 & 47.52\% & -0.03 \\ 
XPEV & XPeng Inc. & from 2021-09-30 to 2022-01-01 & 116 & 66.67\% & 0.10 \\ 
XPEV & XPeng Inc. & from 2022-01-01 to 2022-06-30 & 100 & 48.65\% & -0.24 \\ 
XPEV & XPeng Inc. & from 2022-06-30 to 2022-09-30 & 16 & 54.55\% & 0.23 \\ 
XPEV & XPeng Inc. & from 2021-09-30 to 2022-09-30 & 225 & 60.87\% & -0.14 \\ 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] = demo_stock_df['positive_sentiment'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['Close_change'] = demo_stock_df['Close'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demo_stock_df['positive_sentiment_change'] 