<a href="https://colab.research.google.com/github/XGHouFTW/py-cryptopredict/blob/main/MergeDates.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [32]:
# This script takes in a list of datasets, averages the data values by day, then
# Appends the averages into the dataset, creating a new column each time
# INPUTS: Crypto price dataset, datasets to average
# OUTPUTS: One dataset, with all columns, averaged by day

import pandas as pd
import numpy as np
import datetime
import sys

def main(out_file: str, filenames: list, base: pd.DataFrame):
  for filename in filenames:
    dataset = pd.read_csv(filename)
    base = pd.concat([base, average_by_day(dataset)], axis=1)
  base.to_csv(out_file, index=False)

def average_by_day(df: pd.DataFrame, start_date: datetime.date, end_date: datetime.date)-> pd.DataFrame:
  # return daily averages of a dataset (rows with identical dates are averaged 
  # and consolidated into one row)
  averaged_df = pd.DataFrame(np.zeros((800, len(list(df.columns)))), columns = df.columns)
  current_date = start_date
  time_step = datetime.timedelta(days=1)
  while current_date <= end_date:
    day_average = calculate_average(current_date, df)
    averaged_df = pd.concat([averaged_df, day_average], axis=0)
    current_date = current_date + time_step
  return averaged_df

def calculate_average(date: datetime.date, df: pd.DataFrame):
  daily_data = df[df["Date"]==date]
  averages = pd.DataFrame([np.mean(daily_data[column]) for column in list(df.columns)], columns = df.columns)
  return averages

In [None]:
if __name__ == "__main__":
  filepath = "./"
  out_file = filepath + "merged_data.csv"
  filenames = ["NASDAQ_historical.csv", "AMC_historical.csv", "GME_historical.csv", 
               "TSLA_historical.csv", "DJI_historical.csv", "S&P_historical.csv", 
               "AAPL_historical.csv", "reddit-bitcoin.csv", "reddit-finance.csv", 
               "reddit-politics.csv", "reddit-economy.csv", "reddit-cryptocurrency.csv", 
               "reddit-pandemic.csv", "reddit-dogecoin.csv", "reddit-ethereum.csv", 
               "pandemic_news_scored.csv", "politics_news_scored.csv", 
               "finance_news_scored.csv", "economy_news_scored.csv", 
               "dogecoin_news_scored.csv", "bitcoin_news_scored.csv", 
               "ethereum_news_scored.csv", "cryptocurrency_news_scored.csv"]
  