In [1]:
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path

In [2]:
# Loading data
file_path = Path("crypto_data.csv")
df = pd.read_csv(file_path)
df = df.drop('Unnamed: 0', axis=1)
df.head()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365Coin,X11,True,PoW/PoS,,2300000000
2,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,SixEleven,SHA-256,True,PoW,,611000
4,808,SHA-256,True,PoW/PoS,0.0,0


In [3]:
# Keep cryptocurrencies that are trading
df = df.loc[df['IsTrading'] == True]
# Drop IsTrading column
df = df.drop('IsTrading', axis=1)
df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,Scrypt,PoW/PoS,41.99995,42
1,365Coin,X11,PoW/PoS,,2300000000
2,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
3,SixEleven,SHA-256,PoW,,611000
4,808,SHA-256,PoW/PoS,0.0,0


In [4]:
# Count of rows with null values
df.isnull().values.sum()

459

In [5]:
# Delete rows with null values
df = df.dropna()

In [6]:
# Only keep coins that have been mined
df = df.loc[df['TotalCoinsMined'] > 0]
df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,Scrypt,PoW/PoS,41.99995,42
2,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
5,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
7,Bitcoin,SHA-256,PoW,17927180.0,21000000
8,Ethereum,Ethash,PoW,107684200.0,0


In [7]:
# Drop CoinName column
df = df.drop('CoinName', axis=1)
df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42
2,Scrypt,PoW/PoS,1055185000.0,532000000
5,X13,PoW/PoS,29279420000.0,314159265359
7,SHA-256,PoW,17927180.0,21000000
8,Ethash,PoW,107684200.0,0


In [11]:
# Convert Algorithm and ProofType to numerical data
df_dummies = pd.get_dummies(df, columns=['Algorithm', 'ProofType'], drop_first=True)
#df_dummies.head()
# Amount of features
len(df_dummies.columns)

96

In [13]:
# Standardize dataset
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df_dummies)