In [244]:
import pandas as pd
import datetime
import pytz
import warnings
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
warnings.filterwarnings('ignore')

In [245]:
exchangeRate= pd.read_csv(r'eth_usd_fx_rates.csv')
metadata = pd.read_csv(r'token_metadata.csv')
tokenSales=pd.read_csv(r'token_sales.csv')

In [246]:
## exchange rate
for i in range(len(exchangeRate)):
    exchangeRate['date'][i] = pd.to_datetime(exchangeRate['date'][i]).date()

In [247]:
## Token Sales

tokenSales = tokenSales[['token_index','timestamp','eth','usd']]
tz = pytz.timezone('America/New_York') 
for i in range(len(tokenSales)):
    tokenSales['timestamp'][i] = datetime.datetime.fromtimestamp(tokenSales['timestamp'][i], tz)

In [248]:
## metadata

metadata.isnull().sum()
metadata.fillna('0',inplace=True)

In [249]:
data=pd.merge(tokenSales,metadata,how = 'right',on = 'token_index')
data.drop_duplicates(inplace=True)
data.dropna(inplace=True)
data.reset_index(drop = True,inplace = True)

In [250]:
data['timestamp'][i]

datetime.datetime(2021, 8, 11, 0, 5, 28, tzinfo=<DstTzInfo 'America/New_York' EDT-1 day, 20:00:00 DST>)

In [251]:
data['date'] = ''
for i in range(len(data)):
    data['Trait Count'][i] = int(data['Trait Count'][i].replace(' Trait Count',''))
    data['date'][i] = data['timestamp'][i].date()

In [252]:
dummiesColumns = ['Skin Tone', 'Type', 'Hair',
       'Eyewear', 'Mouth', 'Headwear', 'Facial Hair', 'Smoking Device',
       'Other:Earring', 'Neckwear', 'Skin Feature', 'Other:Medical Mask',
       'Other:Clown Nose', 'rarest_property_name']

In [253]:
for i in range(len(dummiesColumns)):    
    print(dummiesColumns[i])
    dummies = data[dummiesColumns[i]].str.get_dummies()
    data = pd.concat([data, dummies], axis=1)
    data.drop(dummiesColumns[i], axis=1, inplace=True)

Skin Tone
Type
Hair
Eyewear
Mouth
Headwear
Facial Hair
Smoking Device
Other:Earring
Neckwear
Skin Feature
Other:Medical Mask
Other:Clown Nose
rarest_property_name


In [254]:
data=data.sort_values(ascending=True,by = 'timestamp')

In [255]:
data=pd.merge(data,exchangeRate,how = 'left',on = 'date')

In [256]:
# define the column to check for anomalies
column_to_check = 'eth'

# calculate IQR and the lower and upper bounds
q1 = data[column_to_check].quantile(0.25)
q3 = data[column_to_check].quantile(0.75)
iqr = q3 - q1
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr

# identify anomalies (values outside the lower and upper bounds)
anomalies = data[(data[column_to_check] < lower_bound) | (data[column_to_check] > upper_bound)]
data = data.loc[(data[column_to_check] > lower_bound) | (data[column_to_check] < upper_bound)]


In [258]:
corrData = data.drop(['timestamp','date','token_index','usd'],axis = 1)

In [260]:
corrData.corr()

Unnamed: 0,eth,Trait Count,rarity_score,Albino,Alien,Ape,Darker,Lighter,Medium,Zombie,...,Skin Tone:Alien,Skin Tone:Ape,Skin Tone:Darker,Skin Tone:Lighter,Skin Tone:Medium,Skin Tone:Zombie,Smoking Device:Cigarette,Smoking Device:Pipe,Smoking Device:Vape,open
eth,1.000000,-0.034631,0.198024,0.000788,0.344319,0.218855,-0.022805,-0.014203,-0.022578,0.161483,...,0.344319,0.218855,-0.003311,-0.004748,-0.004981,0.162306,0.000904,-0.005783,-0.005089,0.228683
Trait Count,-0.034631,1.000000,0.151610,-0.071386,-0.031500,-0.063053,-0.004265,0.036098,0.025326,-0.034199,...,-0.031500,-0.063053,-0.025156,-0.035577,-0.043574,-0.034607,-0.050965,-0.009771,0.023535,-0.009100
rarity_score,0.198024,0.151610,1.000000,-0.007167,0.337841,0.220979,-0.018770,-0.024309,-0.000790,0.085973,...,0.337841,0.220979,0.053796,0.076098,0.093188,0.084496,-0.012339,-0.016138,-0.007683,-0.048388
Albino,0.000788,-0.071386,-0.007167,1.000000,-0.007145,-0.012751,-0.215869,-0.200498,-0.203177,-0.020474,...,-0.007145,-0.012751,-0.002154,-0.003046,-0.003730,-0.020359,-0.008883,0.001851,-0.019329,0.017196
Alien,0.344319,-0.031500,0.337841,-0.007145,1.000000,-0.000921,-0.015591,-0.014481,-0.014674,-0.001479,...,1.000000,-0.000921,-0.000156,-0.000220,-0.000269,-0.001470,-0.000642,-0.002204,-0.002489,-0.013629
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Skin Tone:Zombie,0.162306,-0.034607,0.084496,-0.020359,-0.001470,-0.002624,-0.044428,-0.041265,-0.041816,0.994406,...,-0.001470,-0.002624,-0.000443,-0.000627,-0.000768,1.000000,-0.001828,-0.006282,-0.007092,-0.032683
Smoking Device:Cigarette,0.000904,-0.050965,-0.012339,-0.008883,-0.000642,-0.001145,0.012663,-0.018004,0.010915,-0.001839,...,-0.000642,-0.001145,-0.000193,-0.000274,-0.000335,-0.001828,1.000000,-0.002741,-0.003094,-0.003091
Smoking Device:Pipe,-0.005783,-0.009771,-0.016138,0.001851,-0.002204,-0.003934,-0.012254,0.013447,-0.000634,-0.006317,...,-0.002204,-0.003934,-0.000665,-0.000940,-0.001151,-0.006282,-0.002741,1.000000,-0.010633,-0.007193
Smoking Device:Vape,-0.005089,0.023535,-0.007683,-0.019329,-0.002489,-0.004442,-0.017650,0.032384,-0.000467,-0.007132,...,-0.002489,-0.004442,-0.000750,-0.001061,-0.001299,-0.007092,-0.003094,-0.010633,1.000000,0.001301


In [261]:
targetCol = 'eth'
sorted_corr = corrData.corr()[targetCol].abs().sort_values(ascending=False)

In [262]:
sorted_corr

eth                       1.000000
Alien                     0.344319
Skin Tone:Alien           0.344319
Alien                     0.344319
open                      0.228683
                            ...   
Dark Hair                 0.000258
Headwear:Cap Forward      0.000191
Hair:Blonde Bob           0.000178
Eyewear:Classic Shades    0.000153
Big Beard                 0.000096
Name: eth, Length: 208, dtype: float64

In [263]:
X.isna().sum()

Alien                     0
Alien                     0
Ape                       0
Ape                       0
Zombie                    0
Zombie                    0
Alien                     0
Alien                     0
Ape                       0
Ape                       0
Female                    0
Male                      0
Zombie                    0
Zombie                    0
Buck Teeth                0
Beanie                    0
Pink With Hat             0
Eyewear:Regular Shades    0
Headwear:Beanie           0
Headwear:Hoodie           0
Headwear:Pink With Hat    0
Mouth:Buck Teeth          0
Skin Tone:Albino          0
Skin Tone:Alien           0
Skin Tone:Ape             0
Skin Tone:Zombie          0
dtype: int64

In [223]:
X = data.drop(['eth','usd','date','timestamp','open','0'], axis=1)
y = data['eth']

# Create a linear regression model
lr = LinearRegression()

# Create an RFE object with 5 features
rfe = RFE(lr, n_features_to_select=20)

# Fit the RFE object to the data
rfe.fit(X, y)

# Print the selected features
print("Selected features:", X.columns[rfe.support_])


Selected features: Index(['Alien', 'Ape', 'Zombie', 'Alien', 'Ape', 'Female', 'Male', 'Zombie',
       'Buck Teeth', 'Beanie', 'Pink With Hat', 'Eyewear:Regular Shades',
       'Headwear:Beanie', 'Headwear:Hoodie', 'Headwear:Pink With Hat',
       'Mouth:Buck Teeth', 'Skin Tone:Albino', 'Skin Tone:Alien',
       'Skin Tone:Ape', 'Skin Tone:Zombie'],
      dtype='object')


In [232]:
# Split data into training and testing sets
dataRF=data[['Alien', 'Ape', 'Zombie', 'Alien', 'Ape', 'Female', 'Male', 'Zombie',
       'Buck Teeth', 'Beanie', 'Pink With Hat', 'Eyewear:Regular Shades',
       'Headwear:Beanie', 'Headwear:Hoodie', 'Headwear:Pink With Hat',
       'Mouth:Buck Teeth', 'Skin Tone:Albino', 'Skin Tone:Alien',
       'Skin Tone:Ape', 'Skin Tone:Zombie','eth']]
X = dataRF.drop(['eth'], axis=1)
y = dataRF['eth']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [238]:
# Create the random forest regressor model
rf = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model on the training set
rf.fit(X_train, y_train)
scores = cross_val_score(rf, X_train, y_train, cv=5)

# Make predictions on the testing set
y_pred = rf.predict(X_test)

# Evaluate the model performance using mean squared error
mse = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:', mse)


Mean Squared Error: 4770.335091418781


In [272]:
scoring=data[['token_index','Alien', 'Ape', 'Zombie', 'Alien', 'Ape', 'Female', 'Male', 'Zombie',
       'Buck Teeth', 'Beanie', 'Pink With Hat', 'Eyewear:Regular Shades',
       'Headwear:Beanie', 'Headwear:Hoodie', 'Headwear:Pink With Hat',
       'Mouth:Buck Teeth', 'Skin Tone:Albino', 'Skin Tone:Alien',
       'Skin Tone:Ape', 'Skin Tone:Zombie']]

In [273]:
scoring.drop_duplicates(inplace = True)

In [278]:
scoringPredictSet = scoring.drop('token_index',axis =1)

In [279]:
prediction = rf.predict(scoringPredictSet)

In [282]:
scoring['PredictedEth'] = prediction

In [285]:
finalResult=scoring[['token_index','PredictedEth']]

In [286]:
finalResult.to_csv('FinalEthPriceNFTs.csv')