In [4]:
import json
import pandas as pd
import numpy as np

In [5]:
A = 0.6
a = 3e-5 # [price]
b = 7.5e4 # [-]
B = 1
c = 9.5e-6 # [games]
d = 0   # [-]

version_list = ["all_nif", "all_ifs"]
position_list = ["GK", "RB,RWB", "LB,LWB", "CB", "CDM,CM,CAM", "RM,RW,RF", "LM,LW,LF", "CF,ST"]
time_list = ["2019120723", "2019121209"]

In [6]:
def import_scraped_data(time, position, version):
    filename = "../scraped_data/%s_%s_%s.json" % (time, position, version)
    with open(filename, encoding='utf-8') as json_data:
        d = json.load(json_data)
        json_data.close()
    return d

def value(games, price):
    P = np.array(price.to_list())
    N = np.array(games.to_list())
    V = (A + (1 - A) / (1 + np.exp(-a * (P - b)))) * (B * (c * N + d))
    Vmax = np.max(V)
    return V/Vmax

def clean_dataframe(df):
    df = df.loc[:, ["Name", "Rating", "PS4", "Games"]]
    df.rename(columns={'PS4':'Price'}, inplace=True)
    df.Name = df.Name.str.replace(r"\(.*\)","")
    df.Rating = pd.to_numeric(df.Rating)
    df.Games = pd.to_numeric(df.Games.str.replace(",",""))
    df.Price = (df.Price.replace(r'[KM]+$', '', regex=True).astype(float) * \
                    df.Price.str.extract(r'[\d\.]+([KM]+)', expand=False)
                    .fillna(1).replace(['K','M'], [10**3, 10**6]).astype(int))
    df.Price = df.Price.astype("int64")
    return df

def dataframe_position(time, position):
    d1 = import_scraped_data(time, position, version_list[0])
    d2 = import_scraped_data(time, position, version_list[1])
    df1 = clean_dataframe(pd.DataFrame(d1))
    df2 = clean_dataframe(pd.DataFrame(d2))
    frames = [df1, df2]
    df = pd.concat(frames, ignore_index=True)
    df["Value"] = value(df.Games, df.Price)
    df = df.sort_values(by=['Value'], ascending=False)
    return df

def dataframe_position_dt(position):
    df0 = dataframe_position(time_list[0], position)
    df1 = dataframe_position(time_list[1], position)
    dn = np.zeros(len(df1.index))
    i = 0
    for index_df1, row_df1 in df1.iterrows():
        for index_df0, row_df0 in df0.iterrows():
            if row_df1["Name"] == row_df0["Name"] and row_df1["Rating"] == row_df0["Rating"]:
                dn[i] = row_df1["Games"] - row_df0["Games"]
        i += 1
    df1.Games = dn.astype(int)
    df1.Value = value(df1.Games, df1.Price)
    df1.sort_values(by=['Value'], ascending=False, inplace=True)
    return df1

In [31]:
df = dataframe_position_dt(position_list[3])
# df.sort_values(by=['Games'], ascending=False, inplace=True)
df.head(60)

Unnamed: 0,Name,Rating,Price,Games,Value
6,Virgil van Dijk,90,546000,283083,1.0
3,Raphaël Varane,85,191000,281473,0.982426
1,Clément Lenglet,85,64000,327787,0.888467
8,Sergio Ramos,89,112000,253231,0.805854
7,Kalidou Koulibaly,89,43250,252366,0.634165
0,Éder Militão,81,1900,246890,0.558307
10,Aymeric Laporte,87,24000,188973,0.44806
12,Samuel Umtiti,86,27750,168238,0.402953
2,Lucas Hernández,84,5100,167596,0.381128
11,Kostas Manolas,85,15500,160211,0.372098


In [None]:
# Sane [Martial] 437
# Varane [Militao] 
# Kante [Sissoko] 1000
# Salah [Dembele]
# Aubameyang [Lucas] 1335
# Mane [Sane] 1517