In [1]:
import pandas as pd
from kedro.config import ConfigLoader
from kedro.io import DataCatalog

In [None]:
# Initialise a ConfigLoader
conf_loader = ConfigLoader("../conf")

# Load the data catalog configuration from catalog.yml
conf_catalog = conf_loader.get("catalog.yml")

# Create the DataCatalog instance from the configuration
catalog = DataCatalog.from_config(conf_catalog)

In [None]:
# Load the dataset and print the output
djiStockData = catalog.load("nb_dji_stock")
djiStockData.head()

In [None]:
djiStockData.head(5)

In [5]:
tweetData = catalog.load("nb_dji_tweets")

In [None]:
tweetData.head(-5)

In [None]:
tweetData.info()

In [8]:
df_merge = pd.merge(djiStockData, tweetData, how = 'inner', on = 'date')

In [9]:
df_merge["pct_change"] = df_merge['adjusted_close'].shift(1)/df_merge['adjusted_close'] -1
df_merge['direction'] = df_merge['pct_change'].apply(lambda x: 1 if x>0 else 0)
df_merge['date'] = pd.to_datetime(df_merge['date'])
df_merge.set_index('date', inplace=True)
df_merge['direction'] = df_merge['direction'].shift(1)
df_merge.dropna(inplace=True)

In [None]:
df_merge.head()

In [None]:
df_merge.columns

In [12]:
X = df_merge.loc[:,['open', 'high', 'low', 'close', 'adjusted_close', 'volume', 'count', 'normalized']]
y = df_merge.loc[:,'direction']

In [None]:
X.shape, y.shape

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33, random_state=42)

In [None]:
print(f'Train: {X_train.shape} {y_train.shape}')
print(f'Test: {X_test.shape} {y_test.shape}')

### Tensorflow

In [None]:
from keras.layers import Dense
from tensorflow.python.keras import Sequential
from sklearn.preprocessing import LabelEncoder

# Encode the output labels
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)

# Build the model
model = Sequential()
model.add(Dense(10, input_dim=X.shape[1], activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit the mode
model.fit(X_train, y_train, epochs=50, batch_size=32)

# Evaluate the model
scores = model.evaluate(X_test, y_test)
print(f'Accuracy: {scores[1]:.2f}')