In [1]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

#### Storing all three CSVs into DataFrame

In [2]:
bitcoin_file = "bitstampUSD_1-min_data_2012-01-01_to_2020-12-31.csv"
bitcoin_data_df = pd.read_csv(bitcoin_file)
bitcoin_data_df.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,1325317980,,,,,,,
2,1325318040,,,,,,,
3,1325318100,,,,,,,
4,1325318160,,,,,,,


#### Removing unnecessary columns and renaming to match other dataframe

In [3]:
bitcoin_data_df.rename(columns={'Timestamp':'Unix Timestamp'}, inplace=True)
new_bitcoin_data_df = bitcoin_data_df[['Unix Timestamp', 'Open', 'High', 'Low', 'Close']].copy()
new_bitcoin_data_df['date'] = pd.to_datetime(new_bitcoin_data_df['Unix Timestamp'], unit='s').dt.date
new_bitcoin_data_df.head()

Unnamed: 0,Unix Timestamp,Open,High,Low,Close,date
0,1325317920,4.39,4.39,4.39,4.39,2011-12-31
1,1325317980,,,,,2011-12-31
2,1325318040,,,,,2011-12-31
3,1325318100,,,,,2011-12-31
4,1325318160,,,,,2011-12-31


In [4]:
new_bitcoin_data_df.groupby('date').last().reset_index()

Unnamed: 0,date,Unix Timestamp,Open,High,Low,Close
0,2011-12-31,1325375940,4.58,4.58,4.58,4.58
1,2012-01-01,1325462340,5.00,5.00,5.00,5.00
2,2012-01-02,1325548740,5.00,5.00,5.00,5.00
3,2012-01-03,1325635140,5.29,5.29,5.29,5.29
4,2012-01-04,1325721540,5.37,5.57,5.37,5.57
...,...,...,...,...,...,...
3281,2020-12-27,1609113540,26217.19,26259.60,26217.19,26259.60
3282,2020-12-28,1609199940,27037.78,27050.00,27024.52,27037.91
3283,2020-12-29,1609286340,27371.72,27377.85,27355.99,27370.00
3284,2020-12-30,1609372740,28910.54,28911.52,28867.60,28881.30


In [6]:
pd.to_datetime(new_bitcoin_data_df['Unix Timestamp'], unit='s').dt.date

0          2011-12-31
1          2011-12-31
2          2011-12-31
3          2011-12-31
4          2011-12-31
              ...    
4727772    2020-12-30
4727773    2020-12-30
4727774    2020-12-30
4727775    2020-12-30
4727776    2020-12-31
Name: Unix Timestamp, Length: 4727777, dtype: object

In [7]:
ethereum_file = "ETH_1H.csv"
ethereum_data_df = pd.read_csv(ethereum_file)
ethereum_data_df.head()

Unnamed: 0,Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume
0,1586995200000,2020-04-16 00:00:00,ETHUSD,152.94,152.94,150.39,150.39,650.188125
1,1586991600000,2020-04-15 23:00:00,ETHUSD,155.81,155.81,151.39,152.94,4277.567299
2,1586988000000,2020-04-15 22:00:00,ETHUSD,157.18,157.3,155.32,155.81,106.337279
3,1586984400000,2020-04-15 21:00:00,ETHUSD,158.04,158.31,157.16,157.18,55.244131
4,1586980800000,2020-04-15 20:00:00,ETHUSD,157.1,158.1,156.87,158.04,144.262622


#### Removing milliseconds from Ethereum timestamp

In [11]:
ethereum_data_df['Unix Timestamp'] = ethereum_data_df['Unix Timestamp'].apply(str)
ethereum_data_df['Unix Timestamp'] = [x[:-4] for x in ethereum_data_df['Unix Timestamp']]
ethereum_data_df.head()

Unnamed: 0,Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume
0,15869,2020-04-16 00:00:00,ETHUSD,152.94,152.94,150.39,150.39,650.188125
1,15869,2020-04-15 23:00:00,ETHUSD,155.81,155.81,151.39,152.94,4277.567299
2,15869,2020-04-15 22:00:00,ETHUSD,157.18,157.3,155.32,155.81,106.337279
3,15869,2020-04-15 21:00:00,ETHUSD,158.04,158.31,157.16,157.18,55.244131
4,15869,2020-04-15 20:00:00,ETHUSD,157.1,158.1,156.87,158.04,144.262622


#### Removing unnecessary columns

In [20]:
new_ethereum_data_df = ethereum_data_df[['Unix Timestamp', 'Open', 'High', 'Low', 'Close']].copy()
new_ethereum_data_df['date'] = pd.to_datetime(new_ethereum_data_df['Unix Timestamp'], unit='s').dt.date
new_ethereum_data_df.head()

Unnamed: 0,Unix Timestamp,Open,High,Low,Close,date
0,15869,152.94,152.94,150.39,150.39,1970-01-01
1,15869,155.81,155.81,151.39,152.94,1970-01-01
2,15869,157.18,157.3,155.32,155.81,1970-01-01
3,15869,158.04,158.31,157.16,157.18,1970-01-01
4,15869,157.1,158.1,156.87,158.04,1970-01-01


In [18]:
new_ethereum_data_df.groupby('date').last().reset_index()

Unnamed: 0,date,Unix Timestamp,Open,High,Low,Close
0,1970-01-01,14,0.0,12.0,0.0,9.55


In [13]:
bitcoin_tweet_file = "tweets.csv"
bitcoin_tweet_df = pd.read_csv(bitcoin_tweet_file)
bitcoin_tweet_df.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,id,user,fullname,url,timestamp,replies,likes,retweets
0,1132977055300300800,KamdemAbdiel,Abdiel kamdem,,2019-05-27 11:49:14+00,0,0,0
1,1132977073402736640,bitcointe,Bitcointe,,2019-05-27 11:49:18+00,0,0,0
2,1132977023893139456,3eyedbran,Bran - 3 Eyed Raven,,2019-05-27 11:49:06+00,0,2,1
3,1132977089089556481,DetroitCrypto,J. Scardina,,2019-05-27 11:49:22+00,0,0,0
4,1132977092340191232,mmursaleen72,Muhammad Mursaleen,,2019-05-27 11:49:23+00,0,0,0


#### Reformatting timestamp

In [14]:
bitcoin_tweet_df['timestamp'] = pd.to_datetime(bitcoin_tweet_df['timestamp'])
bitcoin_tweet_df['timestamp'] = bitcoin_tweet_df['timestamp'].astype(np.int64) // 10 ** 9
print (bitcoin_tweet_df)

                         id            user             fullname  url  \
0       1132977055300300800    KamdemAbdiel        Abdiel kamdem  NaN   
1       1132977073402736640       bitcointe            Bitcointe  NaN   
2       1132977023893139456       3eyedbran  Bran - 3 Eyed Raven  NaN   
3       1132977089089556481   DetroitCrypto          J. Scardina  NaN   
4       1132977092340191232    mmursaleen72   Muhammad Mursaleen  NaN   
...                     ...             ...                  ...  ...   
999994  1130977214802145282        eddg3110             eddg3110  NaN   
999995  1130977332787728384    EPICRYPTO369         EPICRYPTO369  NaN   
999996  1130970379944701953   garylbrowning        Gary Browning  NaN   
999997  1130978980046069762  setsuzokuotoko               接続された男  NaN   
999998  1130977610396262400        MtGox101         Mt. Goxonaut  NaN   

         timestamp  replies  likes  retweets  
0       1558957754        0      0         0  
1       1558957758        0  

In [15]:
bitcoin_tweet_df.head()

Unnamed: 0,id,user,fullname,url,timestamp,replies,likes,retweets
0,1132977055300300800,KamdemAbdiel,Abdiel kamdem,,1558957754,0,0,0
1,1132977073402736640,bitcointe,Bitcointe,,1558957758,0,0,0
2,1132977023893139456,3eyedbran,Bran - 3 Eyed Raven,,1558957746,0,2,1
3,1132977089089556481,DetroitCrypto,J. Scardina,,1558957762,0,0,0
4,1132977092340191232,mmursaleen72,Muhammad Mursaleen,,1558957763,0,0,0


In [17]:
bitcoin_tweet_df['date'] = pd.to_datetime(bitcoin_tweet_df['timestamp'], unit='s').dt.date 
bitcoin_tweet_df.head()

Unnamed: 0,id,user,fullname,url,timestamp,replies,likes,retweets,date
0,1132977055300300800,KamdemAbdiel,Abdiel kamdem,,1558957754,0,0,0,2019-05-27
1,1132977073402736640,bitcointe,Bitcointe,,1558957758,0,0,0,2019-05-27
2,1132977023893139456,3eyedbran,Bran - 3 Eyed Raven,,1558957746,0,2,1,2019-05-27
3,1132977089089556481,DetroitCrypto,J. Scardina,,1558957762,0,0,0,2019-05-27
4,1132977092340191232,mmursaleen72,Muhammad Mursaleen,,1558957763,0,0,0,2019-05-27
