In [1]:
import os
import pandas as pd

os.chdir('../../')

In [3]:
reactions_score_consistent_changepoints = pd.read_excel("results/identified_changepoints/reactions_score_consistent_changepoints.xlsx")
pct_change_consistent_changepoints = pd.read_excel("results/identified_changepoints/pct_change_consistent_changepoints.xlsx")
log_transformed_pct_change_consistent_changepoints = pd.read_excel("results/identified_changepoints/log_transformed_pct_change_reactions_score_consistent_changepoints.xlsx")

In [4]:
changepoint_sets = {
    'Reactions Score Raw': reactions_score_consistent_changepoints,
    'Percentage Change': pct_change_consistent_changepoints,
    'Log Transformed Percentage Change': log_transformed_pct_change_consistent_changepoints
}

In [5]:
event_window = 5

common_changepoints = {}
for method, cp_set in changepoint_sets.items():
    for _, row in cp_set.iterrows():
        date = row['date']
        
        # Check if this date is already in common_changepoints (within window)
        added = False
        for existing_date in list(common_changepoints.keys()):
            if abs((date - existing_date).days) <= event_window:
                common_changepoints[existing_date].append((method))
                added = True
                break
        
        if not added:
            common_changepoints[date] = [(method)]

In [6]:
common_changepoints

{Timestamp('2023-07-02 17:21:41'): ['Reactions Score Raw',
  'Percentage Change',
  'Log Transformed Percentage Change'],
 Timestamp('2023-03-29 12:09:00'): ['Reactions Score Raw',
  'Percentage Change',
  'Log Transformed Percentage Change'],
 Timestamp('2022-08-25 06:17:38'): ['Reactions Score Raw',
  'Percentage Change',
  'Log Transformed Percentage Change'],
 Timestamp('2023-02-04 05:44:59'): ['Reactions Score Raw',
  'Percentage Change',
  'Log Transformed Percentage Change'],
 Timestamp('2022-11-30 04:17:37'): ['Reactions Score Raw',
  'Percentage Change',
  'Log Transformed Percentage Change'],
 Timestamp('2023-08-09 15:19:24'): ['Reactions Score Raw',
  'Percentage Change',
  'Log Transformed Percentage Change'],
 Timestamp('2024-06-09 15:10:28'): ['Reactions Score Raw',
  'Percentage Change',
  'Log Transformed Percentage Change'],
 Timestamp('2024-08-18 15:52:58'): ['Reactions Score Raw',
  'Percentage Change',
  'Log Transformed Percentage Change'],
 Timestamp('2024-10-14 0

In [7]:
len(common_changepoints)

25

In [8]:
common_changepoints_per_method = []

for datetime, methods in common_changepoints.items():
    available_methods = {
        'datetime': datetime,
        'Reactions Score Raw': 'Reactions Score Raw' in methods,
        'Percentage Change': 'Percentage Change' in methods,
        'Log Transformed Percentage Change': 'Log Transformed Percentage Change' in methods
    }

    common_changepoints_per_method.append(available_methods)

In [9]:
pd.DataFrame(common_changepoints_per_method)

Unnamed: 0,datetime,Reactions Score Raw,Percentage Change,Log Transformed Percentage Change
0,2023-07-02 17:21:41,True,True,True
1,2023-03-29 12:09:00,True,True,True
2,2022-08-25 06:17:38,True,True,True
3,2023-02-04 05:44:59,True,True,True
4,2022-11-30 04:17:37,True,True,True
5,2023-08-09 15:19:24,True,True,True
6,2024-06-09 15:10:28,True,True,True
7,2024-08-18 15:52:58,True,True,True
8,2024-10-14 04:25:12,True,True,True
9,2022-10-16 04:46:36,True,True,True


In [10]:
log_transformed_pct_change_consistent_changepoints

Unnamed: 0,date,methods,method
0,2022-07-04 10:51:31,"[('Raw', 0.00561893), ('MA7', 0.0375529), ('MA...",reactions_score
1,2022-08-25 06:17:38,"[('Raw', 0.00547577), ('MA7', 0.0329134), ('MA...",reactions_score
2,2022-10-16 04:46:36,"[('Raw', 0.00515487), ('MA7', 0.0402634), ('MA...",reactions_score
3,2022-05-21 21:10:11,"[('Raw', 0.00428256), ('MA7', 0.0232446), ('MA...",reactions_score
4,2022-12-01 16:53:28,"[('Raw', 0.00346843), ('MA7', 0.0358117), ('MA...",reactions_score
5,2022-03-30 15:27:21,"[('Raw', 0.00255653), ('MA7', 7.43634e-07), ('...",reactions_score
6,2023-02-04 20:11:00,"[('Raw', 0.00176569), ('MA7', 0.0240006), ('MA...",reactions_score
7,2023-03-30 02:43:01,"[('Raw', 1.87203e-05), ('MA7', 0.00595796), ('...",reactions_score
8,2023-05-21 11:31:46,"[('Raw', 2.22999e-06), ('MA7', 4.66129e-06), (...",reactions_score
9,2023-06-11 08:06:25,"[('Raw', 1.56433e-06), ('MA7', 2.87451e-08), (...",reactions_score


In [11]:
set(reactions_score_consistent_changepoints.date.unique()) - set(log_transformed_pct_change_consistent_changepoints.date.unique())

{Timestamp('2022-03-30 10:25:28'),
 Timestamp('2022-05-21 15:06:09'),
 Timestamp('2022-07-03 20:06:27'),
 Timestamp('2022-11-30 04:17:37'),
 Timestamp('2023-02-04 05:44:59'),
 Timestamp('2023-03-29 12:09:00'),
 Timestamp('2023-09-09 12:49:50'),
 Timestamp('2023-12-22 15:06:36'),
 Timestamp('2024-08-18 15:52:58')}

In [12]:
log_transformed_pct_change_consistent_changepoints.sort_values(by='date')

Unnamed: 0,date,methods,method
5,2022-03-30 15:27:21,"[('Raw', 0.00255653), ('MA7', 7.43634e-07), ('...",reactions_score
3,2022-05-21 21:10:11,"[('Raw', 0.00428256), ('MA7', 0.0232446), ('MA...",reactions_score
0,2022-07-04 10:51:31,"[('Raw', 0.00561893), ('MA7', 0.0375529), ('MA...",reactions_score
1,2022-08-25 06:17:38,"[('Raw', 0.00547577), ('MA7', 0.0329134), ('MA...",reactions_score
2,2022-10-16 04:46:36,"[('Raw', 0.00515487), ('MA7', 0.0402634), ('MA...",reactions_score
4,2022-12-01 16:53:28,"[('Raw', 0.00346843), ('MA7', 0.0358117), ('MA...",reactions_score
6,2023-02-04 20:11:00,"[('Raw', 0.00176569), ('MA7', 0.0240006), ('MA...",reactions_score
7,2023-03-30 02:43:01,"[('Raw', 1.87203e-05), ('MA7', 0.00595796), ('...",reactions_score
8,2023-05-21 11:31:46,"[('Raw', 2.22999e-06), ('MA7', 4.66129e-06), (...",reactions_score
9,2023-06-11 08:06:25,"[('Raw', 1.56433e-06), ('MA7', 2.87451e-08), (...",reactions_score


In [13]:
def extract_ma21_changepoint_magnitude(methods_row):
    magnitude_list = eval(methods_row)
    ma21_data = magnitude_list[-1]
    ma21_value = ma21_data[-1]

    return ma21_value

In [14]:
ma21_magnitude_value = log_transformed_pct_change_consistent_changepoints.methods.apply(extract_ma21_changepoint_magnitude)

In [15]:
log_transformed_pct_change_consistent_changepoints['ma21_magnitude_value'] = ma21_magnitude_value

In [16]:
log_transformed_pct_change_consistent_changepoints['ma21_magnitude_value'] = pd.to_numeric(log_transformed_pct_change_consistent_changepoints.ma21_magnitude_value.astype(float), errors='coerce')

In [17]:
pd.options.display.float_format = '{:,.5f}'.format

In [18]:
log_transformed_pct_change_consistent_changepoints.sort_values(by='date')

Unnamed: 0,date,methods,method,ma21_magnitude_value
5,2022-03-30 15:27:21,"[('Raw', 0.00255653), ('MA7', 7.43634e-07), ('...",reactions_score,0.00868
3,2022-05-21 21:10:11,"[('Raw', 0.00428256), ('MA7', 0.0232446), ('MA...",reactions_score,0.29662
0,2022-07-04 10:51:31,"[('Raw', 0.00561893), ('MA7', 0.0375529), ('MA...",reactions_score,0.37451
1,2022-08-25 06:17:38,"[('Raw', 0.00547577), ('MA7', 0.0329134), ('MA...",reactions_score,0.19051
2,2022-10-16 04:46:36,"[('Raw', 0.00515487), ('MA7', 0.0402634), ('MA...",reactions_score,0.21371
4,2022-12-01 16:53:28,"[('Raw', 0.00346843), ('MA7', 0.0358117), ('MA...",reactions_score,0.12119
6,2023-02-04 20:11:00,"[('Raw', 0.00176569), ('MA7', 0.0240006), ('MA...",reactions_score,4e-05
7,2023-03-30 02:43:01,"[('Raw', 1.87203e-05), ('MA7', 0.00595796), ('...",reactions_score,0.0
8,2023-05-21 11:31:46,"[('Raw', 2.22999e-06), ('MA7', 4.66129e-06), (...",reactions_score,-0.0
9,2023-06-11 08:06:25,"[('Raw', 1.56433e-06), ('MA7', 2.87451e-08), (...",reactions_score,0.0
