In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("steam_top_games_with_price_logs_and_game_info.csv")
p = pd.read_csv("table_price_log.csv")

**Game Quality**

In [None]:
df['positive_review_rate'] = df['positive_review']/df['total_review']
df['has_metacritic'] = df['metacritic_scores'].notna().astype(int)

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
mm_scaler = MinMaxScaler()
sd_scaler = StandardScaler()

df['log_total_review'] = np.log10(df['total_review']) #.replace(0, np.nan)
df['sd_total_review'] = sd_scaler.fit_transform(df[['total_review']])
df['mm_lg_total_review'] = mm_scaler.fit_transform(df[['log_total_review']])
df['mm_sd_total_review'] = mm_scaler.fit_transform(df[['sd_total_review']])
df['mm_metacritic_scores_augmented'] = mm_scaler.fit_transform(df[['metacritic_scores_augmented']])
df = df.sort_values("steam_id").reset_index(drop=True)
df

**Deal Quality**

In [None]:
from datetime import datetime, timedelta, timezone
p['date'] = pd.to_datetime(p['date'], errors='coerce')

# most recent price & regular price
most_recent_price = p.sort_values('date').groupby('steam_id').last().reset_index()[['steam_id', 'price', 'regular']]
most_recent_price.rename(columns={'price': 'current_price', 'regular':'regular_price'}, inplace=True)

# history low
min_price = p.loc[p.groupby('steam_id')['price'].transform('min') == p['price']]
min_price_latest = min_price.sort_values('date').groupby('steam_id').last().reset_index()[['steam_id', 'price', 'date']]
min_price_latest.rename(columns={'price': 'lowest_price', 'date': 'lowest_price_date'}, inplace=True)

# deal rate
rate = p.sort_values('date').groupby('steam_id').last().reset_index()[['steam_id', 'cut_pct']]
rate['cut_pct'] = rate['cut_pct'] / 100
rate.rename(columns={'cut_pct': 'deal_rate'}, inplace=True)

# avg deal rate
avg_cut = p[p['cut_pct'] != 0].groupby('steam_id', as_index=False)['cut_pct'].mean()
avg_cut['cut_pct'] = avg_cut['cut_pct'] / 100
avg_cut.rename(columns={'cut_pct': 'avg_deal_rate'}, inplace=True)

# last year deal count
one_year_ago = datetime.now(timezone.utc) - timedelta(days=365)
p_last_year_deals = p[(p['date'] >= one_year_ago) & (p['cut_pct'] != 0)]
deal_count_last_year = p_last_year_deals.groupby('steam_id').size().reset_index(name='deal_count_last_365_days')

In [None]:
game = (
    df
    .merge(min_price_latest, on='steam_id',how='left')
    .merge(most_recent_price, on='steam_id',how='left')
    .merge(rate, on='steam_id',how='left')
    .merge(avg_cut, on='steam_id',how='left')
    .merge(deal_count_last_year, on='steam_id',how='left')
)

game['highest_deal_rate'] = 1-game["lowest_price"]/game["regular_price"]
game['deal_amount'] = game['regular_price'] - game['current_price']

game = game.fillna(0)

In [None]:
# Calculating how many days has passed since last deal
# Note: if the game is currently on deal, the script search for the 2nd last date for deal
# Sort data by steam_id and date descending
p_sorted = p.sort_values(['steam_id', 'date'], ascending=[True, False])

def get_latest_previous_deal(group):
    # Latest row
    latest = group.iloc[0]
    
    # Filter all rows where cut_pct != 0
    deals = group[group['cut_pct'] != 0]

    if len(deals) == 0:
        return pd.Series({'steam_id': latest['steam_id'], 'last_deal_date': pd.NaT})
    
    if latest['cut_pct'] != 0:
        # Return second most recent deal if exists
        if len(deals) > 1:
            return pd.Series({'steam_id': latest['steam_id'], 'last_deal_date': deals.iloc[1]['date']})
        else:
            today = datetime.now(timezone.utc)
            return pd.Series({'steam_id': latest['steam_id'], 'last_deal_date': today})
    else:
        return pd.Series({'steam_id': latest['steam_id'], 'last_deal_date': deals.iloc[0]['date']})

# Apply per group
last_deal_df = p_sorted.groupby('steam_id')[["steam_id","date","cut_pct"]].apply(get_latest_previous_deal).reset_index(drop=True)

game = (
    game
    .merge(last_deal_df, on='steam_id',how='left')
)

In [None]:
from datetime import datetime, timezone
today = datetime.now(timezone.utc)

game['diff_avg_rate'] = game['deal_rate'] - game['avg_deal_rate']
game['diff_lowest_rate'] = game['deal_rate'] - game['highest_deal_rate']
game['diff_lowest_amount'] = game['current_price'] - game['lowest_price']
game['days_since_last_deal'] = (today - game['last_deal_date']).dt.days
game['clipped_days_since_last_deal'] = game['days_since_last_deal'].clip(upper=365)

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
mm_scaler = MinMaxScaler()
sd_scaler = StandardScaler()


game['log_deal_amount'] = np.log10(game['deal_amount']+0.1)
game['mm_lg_deal_amount'] = mm_scaler.fit_transform(game[['log_deal_amount']])
game['mm_sd_deal_amount'] = mm_scaler.fit_transform(sd_scaler.fit_transform(game[['deal_amount']]))
game['mm_sd_diff_avg_rate'] = mm_scaler.fit_transform(sd_scaler.fit_transform(game[['diff_avg_rate']]))
game['mm_sd_diff_lowest_rate'] = mm_scaler.fit_transform(sd_scaler.fit_transform(game[['diff_lowest_rate']]))
game['mm_sd_diff_lowest_amount'] = 1-mm_scaler.fit_transform(sd_scaler.fit_transform(game[['diff_lowest_amount']]))
# game['mm_sd_days_since_last_deal'] = mm_scaler.fit_transform(sd_scaler.fit_transform(game[['days_since_last_deal']]))
game['mm_sd_clipped_days_since_last_deal'] = mm_scaler.fit_transform(sd_scaler.fit_transform(game[['clipped_days_since_last_deal']]))
game['mm_sd_deal_count_last_365_days'] = 1-mm_scaler.fit_transform(sd_scaler.fit_transform(game[['deal_count_last_365_days']]))

In [None]:
game['game_quality_lg'] = (game['positive_review_rate'] * 0.15 + \
                           game['mm_lg_total_review'] * 0.1 + \
                           game['mm_metacritic_scores_augmented'] * 0.1)/0.35

game['game_quality_sd'] = (game['positive_review_rate'] * 0.15 + \
                           game['mm_sd_total_review'] * 0.1 + \
                           game['mm_metacritic_scores_augmented'] * 0.1)/0.35

game['deal_quality_sd'] = (game['deal_rate'] * 0.175 + \
                        game['mm_sd_deal_amount'] * 0.175 + \
                        game['mm_sd_diff_avg_rate'] * 0.1 + \
                        game['mm_sd_diff_lowest_rate'] * 0.1 + \
                        game['mm_sd_diff_lowest_amount'] * 0.05 + \
                        game['mm_sd_clipped_days_since_last_deal'] * 0.03 + \
                        game['mm_sd_deal_count_last_365_days'] * 0.02)/0.65

game['deal_quality_lg'] = (game['deal_rate'] * 0.175 + \
                        game['mm_lg_deal_amount'] * 0.175 + \
                        game['mm_sd_diff_avg_rate'] * 0.1 + \
                        game['mm_sd_diff_lowest_rate'] * 0.1 + \
                        game['mm_sd_diff_lowest_amount'] * 0.05 + \
                        game['mm_sd_clipped_days_since_last_deal'] * 0.03 + \
                        game['mm_sd_deal_count_last_365_days'] * 0.02)/0.65

game["score_lg"] = game["game_quality_lg"]*0.35+game["deal_quality_lg"]*0.65
game["score_sd"] = game["game_quality_sd"]*0.35+game["deal_quality_sd"]*0.65

**Deal Quality Tunning**

**Final Game Score Table**

In [None]:
score = game[['steam_id', 'game', 'game_quality_lg', 'game_quality_sd', 'deal_quality_sd', 'deal_quality_lg', 'score_lg', 'score_sd',\
              'current_price', 'regular_price', 'lowest_price', 'lowest_price_date', 'review_desc',\
              'deal_rate', 'avg_deal_rate', 'deal_amount', 'deal_count_last_365_days', 'positive_review_rate']].copy()

In [None]:
score['on_deal'] = (score['deal_rate'] != 0).astype(int)

In [None]:
score = score.drop(columns=['deal_quality_sd', 'game_quality_sd', 'score_sd'])

In [None]:
score['game_quality_lg'] = score['game_quality_lg']*100
score['deal_quality_lg'] = score['deal_quality_lg']*100
score['score_lg'] = score['score_lg']*100

In [None]:
score.to_csv("table_score.csv", index=False)

#### Calculating Last Deal Date

In [None]:
# Last deal date
# DIFFERENT FROM SCORE CAL

score = pd.read_csv("table_score.csv")

# Calculating how many days has passed since last deal
p = pd.read_csv("table_price_log.csv")
p['date'] = pd.to_datetime(p['date'])
p_sorted = p.sort_values(['steam_id', 'date'], ascending=[True, False])

def get_latest_previous_deal_UESR(group):
    # Latest row
    latest = group.iloc[0]
    
    # Filter all rows where cut_pct != 0
    deals = group[group['cut_pct'] != 0]

    if len(deals) == 0:
        return pd.Series({'steam_id': latest['steam_id'], 'last_deal_date': pd.NaT})
    
    return pd.Series({'steam_id': latest['steam_id'], 'last_deal_date': deals.iloc[0]['date']})

# Apply per group
last_deal_df = p_sorted.groupby('steam_id')[["steam_id","date","cut_pct"]].apply(get_latest_previous_deal_UESR).reset_index(drop=True)

score = (
    score
    .merge(last_deal_df, on='steam_id',how='left')
)

score.to_csv("table_score.csv", index=False)
score

#### Calculating Avg Days Between Deals

In [None]:
# Load and preprocess
df = pd.read_csv('table_price_log.csv')
df['date'] = pd.to_datetime(df['date'])
df['date_only'] = pd.to_datetime(df['date']).dt.date

# Filter only deal dates (cut_pct ≠ 0)
deals_df = df[df['cut_pct'] != 0].copy()

# Sort by steam_id and date
deals_df.sort_values(['steam_id', 'date'], inplace=True)
deals_df = deals_df.drop_duplicates(subset=['steam_id', 'date_only'], keep='first')
deals_df.drop(columns=['date_only'], inplace=True)

# Calculate time difference between consecutive deals
deals_df['days_between_deal'] = deals_df.groupby('steam_id')['date'].diff().dt.days

# Now calculate average days between deals per steam_id
avg_days_between_deals = (
    deals_df.groupby('steam_id')['days_between_deal']
    .mean()
    .reset_index(name='avg_days_between_deals')
)

score = pd.read_csv("table_score.csv")

score = (
    score
    .merge(avg_days_between_deals, on='steam_id',how='left')
)

score.to_csv("table_score.csv", index=False)