In [59]:
import json
import pandas as pd
import numpy as np

In [60]:
def import_scraped_data(time, position, version):
    filename = "../scraped_data/%s_%s_%s.json" % (time, position, version)
    with open(filename, encoding='utf-8') as json_data:
        d = json.load(json_data)
        json_data.close()
    return d

def value(games, price):
    P = np.array(price.to_list())
    N = np.array(games.to_list())
    V = (A + (1 - A) / (1 + np.exp(-a * (P - b)))) * (B * (c * N + d))
    Vmax = np.max(V)
    return V/Vmax

def clean_dataframe(df):
    df = df.loc[:, ["Name", "Rating", "PS4", "Games"]]
    df.rename(columns={'PS4':'Price'}, inplace=True)
    df.Name = df.Name.str.replace(r"\(.*\)","")
    df.Rating = pd.to_numeric(df.Rating)
    df.Games = pd.to_numeric(df.Games.str.replace(",",""))
    df.Price = (df.Price.replace(r'[KM]+$', '', regex=True).astype(float) * \
                    df.Price.str.extract(r'[\d\.]+([KM]+)', expand=False)
                    .fillna(1).replace(['K','M'], [10**3, 10**6]).astype(int))
    df.Price = df.Price.astype("int64")
    return df

def dataframe_position(time, position):
    d1 = import_scraped_data(time, position, version_list[0])
    d2 = import_scraped_data(time, position, version_list[1])
    df1 = clean_dataframe(pd.DataFrame(d1))
    df2 = clean_dataframe(pd.DataFrame(d2))
    frames = [df1, df2]
    df = pd.concat(frames, ignore_index=True)
    df["Value"] = value(df.Games, df.Price)
    df = df.sort_values(by=['Value'], ascending=False)
    return df

def dataframe_position_dt(position):
    df0 = dataframe_position(time_list[0], position)
    df1 = dataframe_position(time_list[1], position)
    dn = np.zeros(len(df1.index))
    i = 0
    for index_df1, row_df1 in df1.iterrows():
        for index_df0, row_df0 in df0.iterrows():
            if row_df1["Name"] == row_df0["Name"] and row_df1["Rating"] == row_df0["Rating"]:
                dn[i] = row_df1["Games"] - row_df0["Games"]
        i += 1
    df1.Games = dn.astype(int)
    df1.Value = value(df1.Games, df1.Price)
    df1.sort_values(by=['Value'], ascending=False, inplace=True)
    return df1

In [71]:
A = 0.6
a = 3e-5 # [price]
b = 7.5e4 # [-]
B = 1
c = 9.5e-6 # [games]
d = 0   # [-]

version_list = ["all_nif", "all_specials"]
position_list = ["GK", "RB,RWB", "LB,LWB", "CB", "CDM,CM,CAM", "RM,RW,RF", "LM,LW,LF", "CF,ST"]
time_list = ["2019121307", "2019121420"]

In [72]:
df = dataframe_position_dt(position_list[1])
df.head(60)

Unnamed: 0,Name,Rating,Price,Games,Value
0,Nélson Semedo,82,6500,3367711,1.0
1,Kyle Walker,84,20000,1764720,0.539457
2,João Cancelo,84,9400,1428981,0.4267
3,Carvajal,85,11000,1216373,0.364391
5,Trent Alexander-Arnold,83,4700,733800,0.217174
8,Joshua Kimmich,86,18000,688754,0.209538
9,Odriozola,80,1300,686468,0.201971
10,Kévin Malcuit,78,2200,663907,0.19563
18,Azpilicueta,84,6400,185328,0.055021
44,Javier Zanetti,92,0,0,0.0


In [None]:
# Bench: Ben Yedder, de Jong, Martial
# Varane [Militao] 
# Kante [Sissoko] 1000
# Salah [Dembele]
# Aubameyang [Lucas] 1335
# Mane [Sane] 1517