In [1]:
import pandas as pd
import requests
import numpy as np
import os

In [2]:
def convert_to_df(nft_data : dict) -> pd.DataFrame:
    df_data_nft = pd.DataFrame(nft_data.get('nft'), index=[nft_data.get('nft')['token_id']])
    df_data_price_datails = pd.DataFrame(nft_data.get('price_details'), index=[nft_data.get('nft')['token_id']])
    df_data = pd.merge(df_data_nft, df_data_price_datails, left_index=True, right_index=True)
    features = ['marketplace', 'transaction_date', 'seller_address']
    d_features = {}
    for feature in features:
        d_features[feature] = nft_data.get(feature)
    df_data = pd.merge(df_data, pd.DataFrame(d_features, index=[nft_data.get('nft')['token_id']]), left_index=True, right_index=True)
    return df_data


# a function adds new values from transaction data to the dataset
def get_df_from_transaction_data(df_nfts : pd.DataFrame, response : requests.models.Response) -> pd.DataFrame:
    for transaction in response.json()['transactions']:
        df_nfts = pd.concat([df_nfts, convert_to_df(transaction)], ignore_index=True)
    return df_nfts


In [3]:
# Download NFT's data  
# More information: https://docs.nftport.xyz/reference/retrieve-all-transactions

import requests

url = "https://api.nftport.xyz/v0/transactions"

querystring = {"chain":"ethereum","type":"sale","continuation": 'MTY2NDEzMzc3OV82MzQyY2FkNWRlNzEwMjUzM2M5ZWUyNzE='}

headers = {
    "Content-Type": "application/json",
    "Authorization": os.environ.get("KEY_API")
}

# get the first page
response = requests.request("GET", url, headers=headers, params=querystring)



In [4]:
transaction_features = ['contract_type', 'contract_address', 'token_id', 'asset_type', 'price', 'price_usd', 'marketplace', 'transaction_date']
df_nfts_transaction = get_df_from_transaction_data(pd.DataFrame(columns=transaction_features), response)

In [5]:
df_nfts_transaction.head()

Unnamed: 0,contract_type,contract_address,token_id,asset_type,price,price_usd,marketplace,transaction_date,seller_address,contract_address_x,contract_address_y
0,ERC721,0x0f378d6813649160b3229af6fe3390b085ea64e8,2311,ETH,0.002,2.592124,opensea,2022-09-25T19:22:59,0xf3a856479a28ae8dde8a1df7b11e5ecfe93abbaa,,
1,ERC721,0x52607cb9c342821ea41ad265b9bb6a23bea49468,2679,ETH,0.0,0.0,opensea,2022-09-25T19:23:11,0x1e5c29006418a77904cdedcd29291ab9e38b435a,,
2,ERC721,0x4256abdafd5d27d7cf83c31fdc76c75e58307dbc,3117,ETH,0.0025,3.240155,,2022-09-25T19:23:11,0x06fb43d9fe5f257c4a45dd57f39a6e967584ab1c,,
3,ERC721,0xd3261d448619d57da181c846b7c46d41aeb59dd9,1451,ETH,0.03,38.88186,,2022-09-25T19:23:11,0xf5437581a0868b866678588c673aee6beb68ca17,,
4,ERC721,0xd3261d448619d57da181c846b7c46d41aeb59dd9,2047,ETH,0.03,38.88186,,2022-09-25T19:23:11,0x64c7607aecfcf625ba160e7f41ab2c1bc84c9a63,,


In [6]:
import time
# Get information from the next pages
url = "https://api.nftport.xyz/v0/transactions"

# each page gives us 50 more nfts
num_pages = 5

for _ in range(num_pages):
    querystring = {
        "chain":"ethereum",
        "type":"sale",
        "continuation": response.json()['continuation']}

    headers = {
        "Content-Type": "application/json",
        "Authorization": os.environ.get("KEY_API"),
    }

    # get the first page
    # time.sleep(0.05)
    response = requests.request("GET", url, headers=headers, params=querystring)
    if response.status_code == 200:
        df_nfts_transaction = get_df_from_transaction_data(df_nfts_transaction, response)
    else:
        print(response.text)

In [7]:
response.json()['continuation']

# 'MTY2NDEzMzc3OV82MzQyY2FkNWRlNzEwMjUzM2M5ZWUyNzE='

'MTY2NDEzNDEzOV82MzQyY2FkNWRlNzEwMjUzM2M5ZWViNTE='

In [8]:
df_nfts_transaction

Unnamed: 0,contract_type,contract_address,token_id,asset_type,price,price_usd,marketplace,transaction_date,seller_address,contract_address_x,contract_address_y
0,ERC721,0x0f378d6813649160b3229af6fe3390b085ea64e8,2311,ETH,0.0020,2.592124,opensea,2022-09-25T19:22:59,0xf3a856479a28ae8dde8a1df7b11e5ecfe93abbaa,,
1,ERC721,0x52607cb9c342821ea41ad265b9bb6a23bea49468,2679,ETH,0.0000,0.000000,opensea,2022-09-25T19:23:11,0x1e5c29006418a77904cdedcd29291ab9e38b435a,,
2,ERC721,0x4256abdafd5d27d7cf83c31fdc76c75e58307dbc,3117,ETH,0.0025,3.240155,,2022-09-25T19:23:11,0x06fb43d9fe5f257c4a45dd57f39a6e967584ab1c,,
3,ERC721,0xd3261d448619d57da181c846b7c46d41aeb59dd9,1451,ETH,0.0300,38.881860,,2022-09-25T19:23:11,0xf5437581a0868b866678588c673aee6beb68ca17,,
4,ERC721,0xd3261d448619d57da181c846b7c46d41aeb59dd9,2047,ETH,0.0300,38.881860,,2022-09-25T19:23:11,0x64c7607aecfcf625ba160e7f41ab2c1bc84c9a63,,
...,...,...,...,...,...,...,...,...,...,...,...
295,ERC721,0xc86664e7d2608f881f796ee8e24fa9d4d7598406,9277,ETH,0.0890,115.349519,opensea,2022-09-25T19:28:47,0x0b000fd71ec63066fea195b1e724ec8aa52e9bb8,,
296,ERC721,0x3e824b3aad193bd818150f63a2fe17448251b4f8,334,ETH,0.0088,11.405346,opensea,2022-09-25T19:28:47,0x7e29d8676ec82a459945979cb1e053e755515cb5,,
297,ERC721,0x6661c87764adf7fffa3c7922fa6edfa2fd62ccfc,4118,ETH,0.0175,22.681085,opensea,2022-09-25T19:28:59,0x737d3e324d1580943979b8db71dea756bef2c70a,,
298,ERC721,0x9370045ce37f381500ac7d6802513bb89871e076,5138,ETH,0.0970,125.718015,opensea,2022-09-25T19:28:59,0xc9117fa907d889fb3d736f8ce43872685b8f929d,,


In [36]:
# a funtction adds new values from metadata to the dataset
def add_nfts(df_nfts: pd.DataFrame, response: requests.models.Response) -> pd.DataFrame:
    nft = response.json().get('nft')
    if nft == None:
        return df_nfts
    row = np.array([nft.get(el) for el in df_nfts.keys()])
    df_nfts = pd.concat([df_nfts, pd.DataFrame([row], columns = df_nfts.keys())], ignore_index=True)
    return df_nfts


def get_attributes(sr_attributes: pd.Series, response: requests.models.Response) -> pd.Series:
    nft = response.json().get('nft')
    if nft == None or nft.get('metadata') == None:
        return sr_attributes
    sr_attributes = pd.concat(
        [sr_attributes, 
        pd.Series(  
            [nft.get('metadata').get('attributes')], 
            name='attributes',
            index=[nft.get('token_id')])
        ])
    return sr_attributes

In [37]:
import time
features = ['token_id', 'chain', 'contract_address']
df_nfts_contract = pd.DataFrame(columns = features)
sr_attributes = pd.Series(dtype='object', name='attributes')
for irow in df_nfts_transaction.index:
    contract = df_nfts_transaction['contract_address'][irow]
    token = df_nfts_transaction['token_id'][irow]
    url = f"https://api.nftport.xyz/v0/nfts/{contract}/{token}"
    time.sleep(0.05) # we have to wait some time due to the limitation of free access
    querystring = {"chain":"ethereum"}

    headers = {
        "Content-Type": "application/json",
        "Authorization": os.environ.get("KEY_API")
    }

    nft_response = requests.request("GET", url, headers=headers, params=querystring)
    print(nft_response.status_code)
    if nft_response.status_code == 200:
        df_nfts_contract = add_nfts(df_nfts_contract, nft_response)
        sr_attributes = get_attributes(sr_attributes, nft_response)
    else:
        print(nft_response.text)


200
200
200
200
200
422
{"response":"NOK","error":{"status_code":422,"code":"invalid_address","message":"Invalid address: contract_address."}}
200
200
200
422
{"response":"NOK","error":{"status_code":422,"code":"invalid_address","message":"Invalid address: contract_address."}}
200
200
200
200
404
{"response":"NOK","error":{"status_code":404,"code":"not_found","message":"The requested NFT was not found in our database."}}
200
200
422
{"response":"NOK","error":{"status_code":422,"code":"invalid_address","message":"Invalid address: contract_address."}}
200
200
200
200
200
200
422
{"response":"NOK","error":{"status_code":422,"code":"invalid_address","message":"Invalid address: contract_address."}}
200
200
200
422
{"response":"NOK","error":{"status_code":422,"code":"invalid_address","message":"Invalid address: contract_address."}}
200
200
200
200
200
200
404
{"response":"NOK","error":{"status_code":404,"code":"not_found","message":"The requested NFT was not found in our database."}}
200
200

KeyboardInterrupt: 

In [39]:
df_nfts_transaction_1 = df_nfts_transaction.drop(['contract_address_x', 'contract_address_y'], axis=1)
df_nfts_transaction_1.drop_duplicates(subset=['token_id'], inplace=True)

In [40]:
df_nfts_contract_1 = df_nfts_contract.drop(['contract_address'], axis=1)
df_nfts_contract_1.drop_duplicates(subset=['token_id'], inplace=True)


In [41]:
df_nfts_contract_with_attr = pd.merge(df_nfts_contract_1, sr_attributes, how='left', left_on='token_id', right_index=True)
df_nfts_contract_1.head(15)

Unnamed: 0,token_id,chain
0,536,ethereum
1,88,ethereum
2,1612943154678929541327261259022321855836599214...,ethereum
3,7109784878509184870746127601340702965700161956...,ethereum
4,786,ethereum
5,3498,ethereum
6,8621,ethereum
7,1111,ethereum
8,719,ethereum
9,159,ethereum


In [42]:
# df_nfts_contract_with_attr = pd.merge(df_nfts_contract_1, sr_attributes, left_on=['token_id'], right_index=True)
df_final = pd.merge(df_nfts_contract_with_attr, df_nfts_transaction_1,  how='left', on='token_id')
df_final.drop_duplicates(subset=['token_id'], inplace=True)
df_final.drop(['contract_type', 'chain', 'token_id', 'contract_address', 'asset_type', 'marketplace'], axis=1, inplace=True)
df_final.rename(columns={'price' : 'price_eth'}, inplace=True)
df_final.dropna(subset=['attributes'], inplace=True)
df_final

Unnamed: 0,attributes,price_eth,price_usd,transaction_date,seller_address
0,"[{'trait_type': 'State', 'value': 'Unrevealed'}]",0.030000,39.622824,2022-09-25T01:21:23,0x4bfde9c1ab8887452a2a9fb80b6f60e013108ea2
8,"[{'trait_type': 'State', 'value': 'Unrevealed'}]",0.030000,39.622824,2022-09-25T01:21:23,0x4420063f765238e74a05d45be055505da82654b1
17,"[{'display_type': 'date', 'trait_type': 'Creat...",0.002352,3.106429,2022-09-25T01:21:23,0xfdbaed3cd99be718581c8462a0ba81a867ce55f7
18,"[{'trait_type': 'Background', 'value': 'Atlant...",0.016900,22.320857,2022-09-25T01:21:23,0x83b2049b5c4b028b08cbaf336eb4b4414b077888
19,"[{'trait_type': 'State', 'value': 'Unrevealed'}]",0.032300,42.660574,2022-09-25T01:21:23,0x024d66ea93afe0d2ab8af8afeb277f40d974828e
...,...,...,...,...,...
11710,"[{'trait_type': 'Size', 'value': 'Large'}, {'t...",0.004080,5.399918,2022-09-25T06:22:23,0x3a8713065e4daa9603b91ef35d6a8336ef7b26c6
11711,"[{'display_type': 'date', 'trait_type': 'Creat...",0.000000,0.000000,2022-09-25T06:45:47,0xaded665fc49620b88e17abc07211405556bcc5d6
11713,"[{'trait_type': 'Shoulder Pad', 'value': 'Comp...",0.019000,25.091344,2022-09-25T03:49:23,0x4e943b25dedc1a86a4d31dd461b1379d09fd723a
11714,"[{'trait_type': 'Tribe', 'value': 'Party Raver...",0.049000,64.840414,2022-09-25T06:17:11,0x613e681f6c2092a550faac8493a049b927ed5878


In [43]:
def convert_attributes(traits):
    res = dict()
    dict_synonyms = {'clothing' : 'clothes', 
                    'eye' : 'eyes',
                    }
    try:
        for trait in traits:
            if trait['trait_type'].lower() in dict_synonyms.keys():
                res[dict_synonyms[trait['trait_type'].lower()]] = trait.get('value')
            elif trait['trait_type'].lower() == 'eye':
                res['eyes'] = trait.get('value')
            else:
                res[trait['trait_type'].lower()] = trait.get('value')
        return res
    except (TypeError, AttributeError, KeyError):
        return None


In [44]:
df_final_1 = df_final.copy()
df_final_1['attributes'] = df_final['attributes'].apply(convert_attributes)
df_final_1


Unnamed: 0,attributes,price_eth,price_usd,transaction_date,seller_address
0,{'state': 'Unrevealed'},0.030000,39.622824,2022-09-25T01:21:23,0x4bfde9c1ab8887452a2a9fb80b6f60e013108ea2
8,{'state': 'Unrevealed'},0.030000,39.622824,2022-09-25T01:21:23,0x4420063f765238e74a05d45be055505da82654b1
17,"{'created date': 1662794097000, 'length': 6, '...",0.002352,3.106429,2022-09-25T01:21:23,0xfdbaed3cd99be718581c8462a0ba81a867ce55f7
18,"{'background': 'Atlantian City', 'skin': 'Blac...",0.016900,22.320857,2022-09-25T01:21:23,0x83b2049b5c4b028b08cbaf336eb4b4414b077888
19,{'state': 'Unrevealed'},0.032300,42.660574,2022-09-25T01:21:23,0x024d66ea93afe0d2ab8af8afeb277f40d974828e
...,...,...,...,...,...
11710,"{'size': 'Large', 'water': '4.95', 'grass': '7...",0.004080,5.399918,2022-09-25T06:22:23,0x3a8713065e4daa9603b91ef35d6a8336ef7b26c6
11711,"{'created date': 1662060421000, 'length': 9, '...",0.000000,0.000000,2022-09-25T06:45:47,0xaded665fc49620b88e17abc07211405556bcc5d6
11713,"{'shoulder pad': 'Complete leaf', 'helm': 'Lea...",0.019000,25.091344,2022-09-25T03:49:23,0x4e943b25dedc1a86a4d31dd461b1379d09fd723a
11714,"{'tribe': 'Party Raver', 'type': 'Party Animal...",0.049000,64.840414,2022-09-25T06:17:11,0x613e681f6c2092a550faac8493a049b927ed5878


In [45]:
dict_traits = dict()
for attr in df_final_1.attributes:
    if attr:
        try:
            for trait in attr:
                dict_traits[trait] = dict_traits.setdefault(trait, 0) + 1
        except AttributeError:
            pass
        except TypeError:
            # print(trait)
            pass
        
preparred_for_df = []
for key in dict_traits:
    preparred_for_df.append([key, dict_traits[key]])
pd.DataFrame(preparred_for_df, columns=['trait_type','quantity']).sort_values(by=['quantity'], ascending=False).head(20)

Unnamed: 0,trait_type,quantity
7,background,2694
11,eyes,2418
12,mouth,1811
10,clothes,1496
79,fur,960
17,body,827
84,hat,707
8,skin,655
13,head,581
34,headwear,533


In [46]:
attributes = ['background', 'eyes', 'body', 'mouth', 'head', 'clothes', 'hair']
df_separated_attributes = df_final_1.copy()


for attr in attributes:
    df_separated_attributes[attr] = df_final_1['attributes'].apply(lambda x: x.get(attr).lower() if x and type(x.get(attr)) is str else None)
df_separated_attributes.drop(['attributes'], axis=1, inplace=True)
df_separated_attributes

Unnamed: 0,price_eth,price_usd,transaction_date,seller_address,background,eyes,body,mouth,head,clothes,hair
0,0.030000,39.622824,2022-09-25T01:21:23,0x4bfde9c1ab8887452a2a9fb80b6f60e013108ea2,,,,,,,
8,0.030000,39.622824,2022-09-25T01:21:23,0x4420063f765238e74a05d45be055505da82654b1,,,,,,,
17,0.002352,3.106429,2022-09-25T01:21:23,0xfdbaed3cd99be718581c8462a0ba81a867ce55f7,,,,,,,
18,0.016900,22.320857,2022-09-25T01:21:23,0x83b2049b5c4b028b08cbaf336eb4b4414b077888,atlantian city,scarred,,seal,navy,shirt & tie,
19,0.032300,42.660574,2022-09-25T01:21:23,0x024d66ea93afe0d2ab8af8afeb277f40d974828e,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
11710,0.004080,5.399918,2022-09-25T06:22:23,0x3a8713065e4daa9603b91ef35d6a8336ef7b26c6,,,,,,,
11711,0.000000,0.000000,2022-09-25T06:45:47,0xaded665fc49620b88e17abc07211405556bcc5d6,,,,,,,
11713,0.019000,25.091344,2022-09-25T03:49:23,0x4e943b25dedc1a86a4d31dd461b1379d09fd723a,,,,,,,
11714,0.049000,64.840414,2022-09-25T06:17:11,0x613e681f6c2092a550faac8493a049b927ed5878,the raverse,blue,long crop,silver-smile,mohawk,,


In [47]:
df_separated_attributes.fillna(value=np.nan, inplace=True)

In [53]:
df_separated_attributes.dropna(subset=['background', 'eyes', 'body'])


Unnamed: 0,price_eth,price_usd,transaction_date,seller_address,background,eyes,body,mouth,head,clothes,hair
212,0.1000,132.076079,2022-09-25T01:22:11,0x2bccedfcbcf25e48c2c395e676bdb5db7a5b674b,yellow,3d,tan,lazy,,safari,
388,0.0150,19.811412,2022-09-25T01:23:35,0x21dde751132a4f11bf516efdce5b59649dfe090a,lilac,stoned brown headphones,orange spikes,tongue yellow,,,
390,0.0380,50.188910,2022-09-25T01:23:35,0xee8ff92a859ea22710412ce57bf46686b372feee,navy blue,stiched tan,pink,gummy white,,,
710,0.0890,117.547711,2022-09-25T01:25:59,0x6b247f038d3224a2413c231790b6c26f7c7e0cff,orange,toms,dark brown,knife,,dirty tank,
907,0.1280,169.057382,2022-09-25T01:27:35,0x5d716f056e9002c1f4cf513d8227dd1074496010,yellow,happy,light brown,stubborn,bandana,colonial,
...,...,...,...,...,...,...,...,...,...,...,...
11686,0.0320,42.352297,2022-09-25T06:19:47,0x041601dca62c358323ffd2d33770e2221a64d523,white,dead,dad_robe,small_excited,,,
11688,0.0680,89.998631,2022-09-25T06:19:47,0xdb05681616c261f0879e8d168b60fe8d15fa0ee4,white,sleep_deprived,red_hoodie,chubby_cheeks,,,
11696,0.9000,1191.158350,2022-09-25T06:20:47,0x413afa72c5a66979a9e07cacf969f38941ce7cb4,purple,lazy,denim shirt,mo,flattop,,
11705,0.0838,110.910077,2022-09-25T06:21:35,0x7a5c5c7a1da3d219b09defc52c5646a0c9feeecb,light gray,tearful,body,ehhhh,,,wind


In [49]:
df_separated_attributes.to_csv('Dataset_0.4.csv', index=False)