In [1]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

#### Storing Bitcoin CSV into DataFrame

In [2]:
bitcoin_file = "bitstampUSD_1-min_data_2012-01-01_to_2020-12-31.csv"
bitcoin_data_df = pd.read_csv(bitcoin_file)
bitcoin_data_df.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,1325317980,,,,,,,
2,1325318040,,,,,,,
3,1325318100,,,,,,,
4,1325318160,,,,,,,


#### Removing unnecessary columns, renaming one column to match other dataframe and transforming Unix Timestamp to date

In [3]:
bitcoin_data_df.rename(columns={'Timestamp':'Unix Timestamp'}, inplace=True)
new_bitcoin_data_df = bitcoin_data_df[['Unix Timestamp', 'Open', 'High', 'Low', 'Close']].copy()
new_bitcoin_data_df['date'] = pd.to_datetime(new_bitcoin_data_df['Unix Timestamp'], unit='s').dt.date
new_bitcoin_data_df.head()

Unnamed: 0,Unix Timestamp,Open,High,Low,Close,date
0,1325317920,4.39,4.39,4.39,4.39,2011-12-31
1,1325317980,,,,,2011-12-31
2,1325318040,,,,,2011-12-31
3,1325318100,,,,,2011-12-31
4,1325318160,,,,,2011-12-31


#### Grouping by date

In [4]:
new_bitcoin_data_df.groupby('date').last().reset_index()

Unnamed: 0,date,Unix Timestamp,Open,High,Low,Close
0,2011-12-31,1325375940,4.58,4.58,4.58,4.58
1,2012-01-01,1325462340,5.00,5.00,5.00,5.00
2,2012-01-02,1325548740,5.00,5.00,5.00,5.00
3,2012-01-03,1325635140,5.29,5.29,5.29,5.29
4,2012-01-04,1325721540,5.37,5.57,5.37,5.57
...,...,...,...,...,...,...
3281,2020-12-27,1609113540,26217.19,26259.60,26217.19,26259.60
3282,2020-12-28,1609199940,27037.78,27050.00,27024.52,27037.91
3283,2020-12-29,1609286340,27371.72,27377.85,27355.99,27370.00
3284,2020-12-30,1609372740,28910.54,28911.52,28867.60,28881.30


#### Storing Ethereum CSV to dataframe

In [5]:
ethereum_file = "ETH_1H.csv"
ethereum_data_df = pd.read_csv(ethereum_file)
ethereum_data_df.head()

Unnamed: 0,Unix Timestamp,Date,Symbol,Open,High,Low,Close,Volume
0,1586995200,4/16/20 0:00,ETHUSD,152.94,152.94,150.39,150.39,650.188125
1,1586991600,4/15/20 23:00,ETHUSD,155.81,155.81,151.39,152.94,4277.567299
2,1586988000,4/15/20 22:00,ETHUSD,157.18,157.3,155.32,155.81,106.337279
3,1586984400,4/15/20 21:00,ETHUSD,158.04,158.31,157.16,157.18,55.244131
4,1586980800,4/15/20 20:00,ETHUSD,157.1,158.1,156.87,158.04,144.262622


#### Removing unnecessary columns and creating date column from Unix Timestamp

In [6]:
new_ethereum_data_df = ethereum_data_df[['Unix Timestamp', 'Open', 'High', 'Low', 'Close']].copy()
new_ethereum_data_df['date'] = pd.to_datetime(new_ethereum_data_df['Unix Timestamp'], unit='s').dt.date
new_ethereum_data_df.head()

Unnamed: 0,Unix Timestamp,Open,High,Low,Close,date
0,1586995200,152.94,152.94,150.39,150.39,2020-04-16
1,1586991600,155.81,155.81,151.39,152.94,2020-04-15
2,1586988000,157.18,157.3,155.32,155.81,2020-04-15
3,1586984400,158.04,158.31,157.16,157.18,2020-04-15
4,1586980800,157.1,158.1,156.87,158.04,2020-04-15


#### Grouping by date

In [7]:
new_ethereum_data_df.groupby('date').last().reset_index()

Unnamed: 0,date,Unix Timestamp,Open,High,Low,Close
0,2016-05-09,1462798800,0.00,12.00,0.00,9.55
1,2016-05-10,1462838400,9.36,9.36,9.36,9.36
2,2016-05-11,1462924800,9.68,9.94,9.68,9.94
3,2016-05-12,1463011200,10.43,10.43,10.43,10.43
4,2016-05-13,1463097600,10.20,10.40,10.20,10.40
...,...,...,...,...,...,...
1434,2020-04-12,1586649600,158.66,159.66,157.89,157.89
1435,2020-04-13,1586736000,158.56,158.82,150.12,153.50
1436,2020-04-14,1586822400,156.97,158.00,156.38,156.67
1437,2020-04-15,1586908800,158.61,158.61,156.21,157.12


#### Storing Bitcoin related tweet CSV to dataframe

In [8]:
bitcoin_tweet_file = "tweets.csv"
bitcoin_tweet_df = pd.read_csv(bitcoin_tweet_file, sep=';', warn_bad_lines=False, error_bad_lines=False)
bitcoin_tweet_df.head()

KeyboardInterrupt: 

#### Reformatting timestamp

In [None]:
bitcoin_tweet_df['timestamp'] = pd.to_datetime(bitcoin_tweet_df['timestamp'])
bitcoin_tweet_df['timestamp'] = bitcoin_tweet_df['timestamp'].astype(np.int64) // 10 ** 9
print (bitcoin_tweet_df)

In [None]:
bitcoin_tweet_df.head()

In [None]:
bitcoin_tweet_df['date'] = pd.to_datetime(bitcoin_tweet_df['timestamp'], unit='ms').dt.date 
bitcoin_tweet_df.head()

In [None]:
bitcoin_tweet_df.dtypes


In [9]:
from config import pgsql_password
engine = create_engine(f"postgresql://postgres:{pgsql_password}@localhost:5433/ETLProject_db")

In [10]:
engine.table_names()

['new_ethereum_data', 'new_bitcoin_data', 'bitcoin_tweet']

In [None]:
engine

In [None]:
new_bitcoin_data_df.to_sql(name='new_bitcoin_data', con=engine, if_exists='append', index=False)

In [None]:
new_ethereum_data_df.to_sql(name='new_ethereum_data', con=engine, if_exists="append", index=False)

In [None]:
bitcoin_tweet_df.to_sql(name='bitcoin_tweet', con=engine, if_exists="append", index=False)