# Explore `meta-kaggle` Dataset

## --- Submissions

Pre-requisites: 

1. Install Python package `kaggle`
2. [Create token](https://www.kaggle.com/docs/api) and 
3. Put the `kaggle.json` file to the `.kaggle` folder of your home directory
4. Download dataset `meta-kaggle` if haven't yet, by excuting: 
   ```python
   import api
   api.download_rawdata(rawdata_path)
   ```

In [1]:
# std library
import os
from datetime import datetime
from pprint import pprint
from typing import Literal

# third-party
import pandas as pd
from IPython.display import HTML, display

# local
from datafilter import table_filter, table_header
from _data_clean import contest_basic_setting, \
	contest_basic_submission_info, leaderboard_fulfill, \
	select_2_strongest, save_contest_data, Leaderboard_Type

In [2]:
rawdata_path = os.path.abspath('./__rawdata__')

### 1. Mapping Datasets

Create a dataset of submissions, containing the following fields:
- Id 
- TeamId 
- CompetitionId 
- SubmissionDate
- PublicScore
- PrivateScore

In [3]:
# Submissions
# - TeamId
# - SubmissionDate
tbl_submissions = table_filter( \
	'Submissions', rawdata_path,
	fields = ['Id', 'TeamId', 'SubmissionDate', 'IsAfterDeadline',
		'PublicScoreLeaderboardDisplay', 'PrivateScoreFullPrecision'],
	fields_index = ['Id'],
	fields_datetime = ['SubmissionDate'],
)
# drop all submissions after deadlines
tbl_submissions = tbl_submissions[~tbl_submissions['IsAfterDeadline']]
tbl_submissions.drop('IsAfterDeadline', axis=1, inplace=True)
# rename long name
tbl_submissions.rename(columns={
	'PublicScoreLeaderboardDisplay': 'PublicScore',
	'PrivateScoreFullPrecision': 'PrivateScore'
}, inplace=True)
# drop duplicated
## One team can submit only once at a time point
tbl_submissions = tbl_submissions.drop_duplicates(subset=['SubmissionDate', 'TeamId'])

In [4]:
# Mapping: Teams
# - CompetitionId
map_teams_competitions = table_filter( \
	'Teams', rawdata_path,
	fields = ['Id', 'CompetitionId'],
	fields_index = ['Id'],
)

In [5]:
# Merge `Submissions` and `TeamsCompetition`
tbl_submissions = pd.merge(tbl_submissions, map_teams_competitions,
	how = 'left', left_on = 'TeamId', right_on = 'Id', sort = False,
	suffixes=('', '_Team'))
tbl_submissions.drop(columns='Id_Team', inplace=True)
tbl_submissions.dtypes

Id                         int64
TeamId                     int64
SubmissionDate    datetime64[ns]
PublicScore              float64
PrivateScore             float64
CompetitionId              int64
dtype: object

In [6]:
# All fields in `Competitions`
table_header('Competitions', rawdata_path)

{'Id': ['2408'],
 'Slug': ['Eurovision2010'],
 'Title': ['Forecast Eurovision Voting'],
 'Subtitle': ["This competition requires contestants to forecast the voting for this year's Eurovision Song Contest in Norway on May 25th, 27th and 29th."],
 'HostSegmentTitle': ['Featured'],
 'ForumId': ['2'],
 'OrganizationId': [''],
 'EnabledDate': ['04/07/2010 07:57:43'],
 'DeadlineDate': ['05/25/2010 18:00:00'],
 'ProhibitNewEntrantsDeadlineDate': [''],
 'TeamMergerDeadlineDate': [''],
 'TeamModelDeadlineDate': [''],
 'ModelSubmissionDeadlineDate': [''],
 'FinalLeaderboardHasBeenVerified': ['True'],
 'HasKernels': ['True'],
 'OnlyAllowKernelSubmissions': ['False'],
 'HasLeaderboard': ['False'],
 'LeaderboardPercentage': ['10'],
 'ScoreTruncationNumDecimals': ['5'],
 'EvaluationAlgorithmAbbreviation': ['AE'],
 'EvaluationAlgorithmName': ['Absolute Error'],
 'EvaluationAlgorithmDescription': ['Sum of absolute values of all errors.'],
 'EvaluationAlgorithmIsMax': ['False'],
 'MaxDailySubmissions':

In [7]:
# Create Table of Contests
tbl_contests = table_filter( \
	'Competitions', rawdata_path,
	fields=['Id',
		'EnabledDate', 'DeadlineDate', 'ProhibitNewEntrantsDeadlineDate',
		'TeamMergerDeadlineDate', 'TeamModelDeadlineDate', 'TeamModelDeadlineDate',
		'ModelSubmissionDeadlineDate',
		'HasLeaderboard', 'LeaderboardPercentage', 'MaxDailySubmissions',
		'RewardType', 'RewardQuantity', 'NumPrizes',
		'FinalLeaderboardHasBeenVerified', 'EvaluationAlgorithmName', 'Overview', 'Rules'
	],
	fields_datetime=[
		'EnabledDate', 'DeadlineDate', 'ProhibitNewEntrantsDeadlineDate',
		'TeamMergerDeadlineDate', 'TeamModelDeadlineDate', 'TeamModelDeadlineDate',
		'ModelSubmissionDeadlineDate'
	],
	fields_index=['Id'],
)
len(tbl_contests)

9442

In [8]:
def save_to_device(
		contest_id: int,
		leaderboard_type: Leaderboard_Type,
		prize: float | None = None
):
	deadline, total_prize, max_daily_submit, percentage = \
			contest_basic_setting(tbl_contests, contest_id)
	if prize is None:
		prize = total_prize
	assert prize is not None
	players = \
			select_2_strongest(tbl_submissions, contest_id, deadline, leaderboard_type)
	if players is None:
		print(f'Warning: cannot find 2 players in contest {contest_id}')
		return
	else:
		player_i, player_j = players
	save_contest_data( \
		tbl_submissions, contest_id, player_i, player_j,
		deadline, prize, max_daily_submit, percentage, leaderboard_type)

### 2. Splitting Contests by Types

In [9]:
print('>>> Filter 1: How many contests are there having records of submissions from players?')
lst_contests_with_submissions = tbl_submissions['CompetitionId'].unique()
print(len(lst_contests_with_submissions))

# Filter 1
print('>>> We only consider these contests')
tbl_contests = tbl_contests[tbl_contests['Id'].isin(lst_contests_with_submissions)]
tbl_submissions = tbl_submissions.loc[tbl_submissions['CompetitionId'].isin(tbl_contests['Id'])]

>>> Filter 1: How many contests are there having records of submissions from players?
5693
>>> We only consider these contests


In [10]:
print('>>> Filter 2: In above, how many contests are there having public leaderboard?')
lst_contests_with_leaderboard = tbl_contests.loc[tbl_contests['HasLeaderboard']==True]['Id']
print(len(lst_contests_with_submissions))

# Filter 2
print('>>> We only consider these contests')
tbl_contests = tbl_contests[tbl_contests['Id'].isin(lst_contests_with_leaderboard)]
tbl_submissions = tbl_submissions.loc[tbl_submissions['CompetitionId'].isin(tbl_contests['Id'])]

>>> Filter 2: In above, how many contests are there having public leaderboard?
5693
>>> We only consider these contests


In [11]:
print('>>> Is there never-ending contests?')
forever_date = datetime(2029, 1, 1, 0, 0, 0)
print((tbl_contests['DeadlineDate'] > forever_date).sum())
print((tbl_contests['DeadlineDate'].isna()).sum())

>>> Is there never-ending contests?
0
0


In [12]:
print('>>> How many reward types are there?')
print(tbl_contests['RewardType'].value_counts(dropna=False))

>>> How many reward types are there?
RewardType
NaN          4777
USD           407
Knowledge     381
Swag           95
Jobs           14
Kudos          10
Prizes          6
EUR             1
Name: count, dtype: int64


In [13]:
print('>>> List of the number of contests providing multiple prize:')
pprint(tbl_contests['NumPrizes'].value_counts())

>>> List of the number of contests providing multiple prize:
NumPrizes
1     4721
0      498
3      292
5       82
4       28
10      18
6       16
2       11
8       10
7        9
9        4
13       1
12       1
Name: count, dtype: int64


In [14]:
# Split contests with prize type
lst_contest_Usd = tbl_contests.loc[tbl_contests['RewardType']=='USD']['Id']
lst_contest_Knowledge = tbl_contests.loc[tbl_contests['RewardType']=='Knowledge']['Id']
lst_contest_Swag = tbl_contests.loc[tbl_contests['RewardType']=='Swag']['Id']
lst_contest_Kudos = tbl_contests.loc[tbl_contests['RewardType']=='Kudos']['Id']
lst_contest_EUR = tbl_contests.loc[tbl_contests['RewardType']=='EUR']['Id']

# Split contests with prize number
lst_contest_1_prize = tbl_contests.loc[tbl_contests['NumPrizes']==1]['Id']
lst_contest_2_prize = tbl_contests.loc[tbl_contests['NumPrizes']==2]['Id']
lst_contest_3_prize = tbl_contests.loc[tbl_contests['NumPrizes']==3]['Id']
lst_contest_4_prize = tbl_contests.loc[tbl_contests['NumPrizes']==4]['Id']
lst_contest_5_prize = tbl_contests.loc[tbl_contests['NumPrizes']==5]['Id']
lst_contest_6_prize = tbl_contests.loc[tbl_contests['NumPrizes']==6]['Id']
lst_contest_7_prize = tbl_contests.loc[tbl_contests['NumPrizes']==7]['Id']
lst_contest_8_prize = tbl_contests.loc[tbl_contests['NumPrizes']==8]['Id']
lst_contest_9_prize = tbl_contests.loc[tbl_contests['NumPrizes']==9]['Id']

In [15]:
print('>>> In the list of contest with 1 single prize, how many of them has positive reward quantity?')
tbl_contest_1_prize = tbl_contests.loc[tbl_contests['Id'].isin(lst_contest_1_prize)]
tbl_contest_1_prize_has_reward = tbl_contest_1_prize.loc[tbl_contest_1_prize['RewardQuantity'] > 0]
pprint(len(tbl_contest_1_prize_has_reward))

pprint('>>> List their reward types:')
pprint(tbl_contest_1_prize_has_reward['RewardType'].value_counts())

>>> In the list of contest with 1 single prize, how many of them has positive reward quantity?
84
'>>> List their reward types:'
RewardType
USD          42
Knowledge    35
Jobs          6
Name: count, dtype: int64


In [16]:
print('>>> In the list of contest with 3 prizes, how many of them has positive reward quantity?')
tbl_contest_3_prize = tbl_contests.loc[tbl_contests['Id'].isin(lst_contest_3_prize)]
tbl_contest_3_prize_has_reward = tbl_contest_3_prize.loc[tbl_contest_3_prize['RewardQuantity'] > 0]
pprint(len(tbl_contest_3_prize_has_reward))

pprint('>>> List their reward types:')
pprint(tbl_contest_3_prize_has_reward['RewardType'].value_counts())

>>> In the list of contest with 3 prizes, how many of them has positive reward quantity?
196
'>>> List their reward types:'
RewardType
USD     194
Jobs      1
EUR       1
Name: count, dtype: int64


In [17]:
print('>>> In the list of contest with USD prize, list the reward quantity:')
tbl_contest_Usd_prize = tbl_contests.loc[tbl_contests['Id'].isin(lst_contest_Usd)]
pprint(tbl_contest_Usd_prize['NumPrizes'].value_counts())

>>> In the list of contest with USD prize, list the reward quantity:
NumPrizes
3     194
5      74
1      43
4      27
6      16
10     15
2      10
8      10
7       9
9       4
0       3
13      1
12      1
Name: count, dtype: int64


### 3. Select contest providing 1 single USD prize

In [18]:
# select
tbl_contest_1_Usd_prize = tbl_contests[tbl_contests['Id'].isin(lst_contest_Usd)]
tbl_contest_1_Usd_prize = tbl_contest_1_Usd_prize[tbl_contest_1_Usd_prize['Id'].isin(lst_contest_1_prize)]

# filter
tbl_contest_1_Usd_prize = tbl_contest_1_Usd_prize.loc[tbl_contest_1_Usd_prize['RewardQuantity'] > 0]
lst_contest_1_Usd_prize = tbl_contest_1_Usd_prize['Id'].values

In [19]:
lst_contest_1_Usd_prize

array([2435, 2445, 2448, 2452, 2454, 2464, 2467, 2478, 2479, 2487, 2488,
       2496, 2549, 2589, 2762, 2860, 2895, 2963, 3065, 3294, 3364, 3370,
       3377, 3385, 3386, 3469, 3493, 3507, 3521, 3526, 3586, 3706, 3867,
       3928, 3973, 3984, 4195, 4378, 4383, 4493, 4495, 4704])

Note: 
Finally, we pick 13 contests......

2435 (big), 2445 (small), 2454 (small), 2464 (small), 2467 (normal), 2478 (small),<br>
2549 (small), 2762 (small), 2860 (small), 3507 (small), 3526 (small), 3928 (small),<br>
4493 (small)


In [20]:
# Contest Info
contest_id = 3370

# Contest settings
deadline, total_prize, max_daily_submit, percentage = contest_basic_setting(tbl_contests, contest_id)
print('>>> prize =', total_prize)
print('>>> percentage =', percentage, '%')
print('>>> daily submit (max) =', max_daily_submit)
print('>>> deadline =', deadline)

# Display basic info
tbl_submissions_specific = contest_basic_submission_info(tbl_submissions, contest_id)

# Create leaderboard
leaderboard_pub, leaderboard_pri = leaderboard_fulfill(
		tbl_submissions_specific, deadline, 'Normal')
display(leaderboard_pri.display(-1, 10))
display(leaderboard_pub.display(-1, 10))

>>> prize = 10000.0
>>> percentage = 0 %
>>> daily submit (max) = 5
>>> deadline = 2013-09-03 00:00:00
>>> How many teams are there in this contest?
269
>>> How many submissions are there in total?
1705
>>> List the number of submissions for the most active 5 teams:
TeamId
42604    88
38475    84
38577    66
42593    54
40024    47
dtype: int64


(datetime.datetime(2013, 9, 2, 23, 42, 27),
     rank     score  submit_count  participate_days  last_submit_days_ago
 0  52440  0.819605            18                62                     6
 1  38148  0.810542             7               157                     6
 2  40024  0.810524            47               140                     6
 3  47290  0.807197            22                84                     0
 4  42593  0.799571            54               117                     6
 5  42692  0.787822            12               117                     5
 6  38475  0.781332            84               150                     5
 7  54707  0.775953            13                27                     6
 8  38577  0.770812            66               147                     3
 9  42349  0.767801            14               121                     6)

(datetime.datetime(2013, 9, 2, 23, 42, 27),
     rank  score  submit_count  participate_days  last_submit_days_ago
 0  52479    1.0             1                 0                     0
 1  54190    1.0            19                36                     0
 2  55675    1.0             2                14                     0
 3  52486    1.0             3                63                     0
 4  55031    1.0             2                16                     0
 5  47290    1.0            22                84                     0
 6  42531    1.0             2                15                     0
 7  48841    1.0            15                62                     0
 8  53084    1.0             3                54                     0
 9  56858    1.0             1                 0                     0)

In [21]:
# save_to_device(contest_id, 'Percentage_Big')
save_to_device(contest_id, 'Percentage_Small')
# save_to_device(contest_id, 'Normal')



### 4. For those Contests with 2 Prizes

In [22]:
print('>>> In the list of contest with 2 positive USD prizes, what are the values of the prizes?')
tbl_contest_2_Usd_prize = tbl_contests[tbl_contests['Id'].isin(lst_contest_Usd)]
tbl_contest_2_Usd_prize = tbl_contest_2_Usd_prize[tbl_contest_2_Usd_prize['Id'].isin(lst_contest_2_prize)]
tbl_contest_2_Usd_prize = tbl_contest_2_Usd_prize.loc[tbl_contest_2_Usd_prize['RewardQuantity'] > 0]
pprint(tbl_contest_2_Usd_prize['Id'].values)

>>> In the list of contest with 2 positive USD prizes, what are the values of the prizes?
array([2489, 2499, 3023, 3353, 3366, 3403, 3471, 3477, 3509, 4066])


Note: 
 - 2489: 2 $250
 - 2499: $5,000 vs $3,000 
 - 3023: $7,000 vs $2,500
 - 3353: $8,000 vs $2,000
 - 3366: $350 vs $150
 - 3403: complicated
 - 3471: $350 vs $150
 - 3477: $350 vs $150
 - 3509: $350 vs $150
 - 4066: $10,000 vs $5,000

Finally, we pick 

2489 (small), 3353 (small), 3366 (small), 3509 (small), 

In [23]:
contest_2prize_gap = {
	2489: 100, 2499: 2000, 3023: 4500, 3353: 6000, 3366: 200, 3471: 200, 3477: 200, 3509: 200, 4066: 5000
}

In [24]:
## check prize info by:
# display(HTML(tbl_contest_2_Usd_prize.loc[tbl_contest_2_Usd_prize['Id']==4066, ['Overview']].values[0][0]))

In [25]:
# Contest Info
contest_id = 3509

# Contest settings
deadline, total_prize, max_daily_submit, percentage = contest_basic_setting(tbl_contests, contest_id)
print('>>> percentage =', percentage, '%')
print('>>> daily submit (max) =', max_daily_submit)
print('>>> deadline =', deadline)

# Display basic info
tbl_submissions_specific = contest_basic_submission_info(tbl_submissions, contest_id)

# Create leaderboard
leaderboard_pub, leaderboard_pri = leaderboard_fulfill(
		tbl_submissions_specific, deadline, 'Normal')
display(leaderboard_pri.display(-1, 10))
display(leaderboard_pub.display(-1, 10))

>>> percentage = 30 %
>>> daily submit (max) = 3
>>> deadline = 2013-06-18 00:00:00
>>> How many teams are there in this contest?
129
>>> How many submissions are there in total?
491
>>> List the number of submissions for the most active 5 teams:
TeamId
44526    18
43929    15
44729    15
44521    15
45618    13
dtype: int64


(datetime.datetime(2013, 6, 17, 23, 59, 40),
     rank     score  submit_count  participate_days  last_submit_days_ago
 0  43019  0.993798             9                37                     1
 1  42947  0.992147             7                16                     0
 2  43160  0.991891            13                36                     0
 3  44779  0.991872             6                19                     0
 4  46797  0.991531             5                 8                     0
 5  43772  0.991183            13                29                     0
 6  45618  0.991011            13                13                     0
 7  43929  0.990442            15                26                     0
 8  45149  0.989619             1                17                    17
 9  51085  0.989582             1                 0                     0)

(datetime.datetime(2013, 6, 17, 23, 59, 40),
     rank    score  submit_count  participate_days  last_submit_days_ago
 0  43019  0.99365             9                37                     1
 1  44160  0.99266             7                25                     1
 2  43160  0.99239            13                36                     0
 3  42947  0.99233             7                16                     0
 4  45618  0.99189            13                13                     0
 5  43929  0.99097            15                26                     0
 6  46797  0.99046             5                 8                     0
 7  45149  0.99034             1                17                    17
 8  44779  0.98982             6                19                     0
 9  43772  0.98934            13                29                     0)

In [26]:
# save_to_device(contest_id, 'Percentage_Big', prize=contest_2prize_gap[contest_id])
save_to_device(contest_id, 'Percentage_Small', prize=contest_2prize_gap[contest_id])
# save_to_device(contest_id, 'Normal', prize=contest_2prize_gap[contest_id])

### 4. For those Contests with 3  Prizes

In [27]:
print('>>> In the list of contest with 3 positive USD prizes, what are the values of the prizes?')
tbl_contest_3_Usd_prize = tbl_contests[tbl_contests['Id'].isin(lst_contest_Usd)]
tbl_contest_3_Usd_prize = tbl_contest_3_Usd_prize[tbl_contest_3_Usd_prize['Id'].isin(lst_contest_3_prize)]
tbl_contest_3_Usd_prize = tbl_contest_3_Usd_prize.loc[tbl_contest_3_Usd_prize['RewardQuantity'] > 0]
pprint(tbl_contest_3_Usd_prize['Id'].values)

>>> In the list of contest with 3 positive USD prizes, what are the values of the prizes?
array([ 2509,  2518,  2551,  2602,  2606,  2609,  2667,  2732,  2748,
        2749,  2780,  2840,  2888,  2889,  2917,  2958,  2969,  2975,
        2984,  3043,  3046,  3064,  3080,  3084,  3175,  3288,  3316,
        3338,  3342,  3354,  3517,  3599,  3641,  3670,  3756,  3772,
        3774,  3800,  3887,  3926,  3929,  3934,  3951,  3960,  3966,
        3978,  4031,  4043,  4104,  4117,  4120,  4272,  4280,  4366,
        4407,  4438,  4453,  4467,  4471,  4477,  4481,  4488,  4521,
        4571,  4594,  4657,  4699,  4729,  4852,  4853,  4986,  5048,
        5056,  5144,  5174,  5229,  5260,  5261,  5340,  5357,  5390,
        5497,  5558,  5916,  6116,  6243,  6277,  6322,  6392,  6565,
        6644,  6649,  6768,  6841,  6927,  7042,  7043,  7082,  7115,
        7162,  7163,  7277,  7380,  7391,  7456,  7559,  7634,  7878,
        8011,  8076,  8078,  8219,  8220,  8310,  8311,  8396,  8540,


Note: 
 - 2509: $6,000 vs $3,000 vs $1,000
 - 2518: $6,000 vs $3,000 vs $1,000
 - 2551: $3,000 vs $1,500 vs $500
 - 2602: 5,000 vs 3000 vs 2000
 - 2606: $3,000 vs 1500 vs 500
 - 2609: $2,500 vs $1,500 vs $1,000
 - 2667: $60,000 vs $30,000 vs $10,000
 - 2732: $10,000 vs $5,000 vs $2,500
 - 2748: $5000, $2000, and $1000
 - 2749: $5000, $2000, and $1000
 - 2780: $10000 vs $6000 vs $4000
 - 2840: $15000, $5000, $2500
 - 2888: 5,000, 3000, 2000
 - 2889: complicated
 - 2917: $6,000 for first, $3,000 for second and $1,000 for third place submissions.
 - 2958: ???
 - 2969: 1st Place: $14,000, 2nd Place: $7,500, 3rd Place: $2,500
 - 2975: 1st Place: $22,000, 2nd Place: $10,000, 3rd Place: $6,000
 - 2984: First place: $7,000, Second place: $2,000, Third place: $1,000
 - 3043: 1st place: $11,000, 2nd place: $6,000, 3rd place: $2,000
 - 3046: 1st Place: $11,000, 2nd Place: $6,000, 3rd Place: $2,000
 - 3064: $500  1st place, $350  2nd place, $150  3rd place
 - 3080: $300  1st place, $210  2nd place, $  90  3rd place
 - 3084: First place       $12,000, Second place  $5,000, Third place      $3,000
 - 3175: 1st Place: $10,000 USD, 2nd Place: $4,000 USD, 3rd Place: $2,000 USD
 - 3288: 1st Place: $2,500, 2nd Place: $1,500, 3rd Place: $1,000
 - 3316: 1st place: $6,500, 2nd place: $2,500, 3rd place: $1,000
 - 3338: 1st place - $3000, 2nd place - $1500, 3rd place - $500
 - 3342: 1st place: $3000, 2nd place: $2000, 3rd place: $1000
 - 3354: First Place: $500, Second Place: $300, Third Place: $200
 - 3517: 1st place: $700, 2nd place: $600, 3rd place: $500
 - 3599: 1st Place - $2000, 2nd Place - $1000, 3rd Place - $500
 - 3641: 1st place - $3000, 2nd place - $1500, 3rd place - $500
 - 3670: complex
 - 3756: 1st place: $5,000, 2nd place: $3,000, 3rd place: $2,000
 - 3772: 1st place - $5000, 2nd place - $2500, 3rd place - $1000
 - 3774: First place: 500 USD and 1000 USD travel award (*) + Award certificate, Second place: 250 USD and 750 USD travel award (*) + Award certificate, Third place: 100 USD and 400 USD travel award (*) + Award certificate
 - 3800: 1st place - $5000, 2nd place - $2000, 3rd place - $1000
 - 3887: 1st Place - $7,000, 2nd Place - $4,000, 3rd Place - $2,000
 - 3926: 1st place - $1,000, 2nd place - $700, 3rd place - $300
 - 3929: 1st Place - $15,000, 2nd Place - $7,000, 3rd Place - $3,000
 - 3934: 1st Place - $10,000, 2nd Place - $4,000, 3rd Place - $2,000
 - 3951: 1st place - $12,000 (+ $25,000 if license option is exercised), 2nd place - $8,000 (+ $25,000 if license option is exercised), 3rd place - $5,000 (+ $25,000 if license option is exercised)
 - 3960: 1st place - $15000, 2nd place - $7000, 3rd place - $3000
 - 3966: 1st place - $5,000, 2nd place - $2,000, 3rd place - $1,000
 - 3978: 1st place - $100,000, 2nd place - $45,000, 3rd place - $15,000
 - 4031: 1st place - $15,000, 2nd place - $10,000, 3rd place - $5,000
 - 4043: 1st place - $500, 2nd place - $300, 3rd place - $200
 - 4104: 1st place - $50,000, 2nd place - $30,000, 3rd place - $20,000
 - 4117: 1st place - $12000, 2nd place - $3000, 3rd place - $1000
 - 4120: 1st Place - $10,000, 2nd Place - $3,000, 3rd Place - $2,000
 - 4272: 1st place - $15,000, 2nd place - $10,000, 3rd place - $5,000
 - 4280: 1st place - $5000, 2nd place - $3000, 3rd place - $2000
 - 4366: 1st place - $20,000, 2nd place - $12,000, 3rd place - $8,000
 - 4407: 1st place - $10,000, 2nd place - $6,000, 3rd place - $4,000
 - 4438: 1st place - $10,000, 2nd place - $6,000, 3rd place - $4,000
 - 4453: 1st place - $5000, 2nd place - $3000, 3rd place - $2000
 - 4467: 1st place - $15,000, 2nd place - $10,000, 3rd place - $5,000
 - 4471: 1st place - $12,000 (+ $25,000 if license option is exercised), 2nd place - $8,000 (+ $25,000 if license option is exercised), 3rd place - $5,000 (+ $25,000 if license option is exercised)
 - 4477: 1st place - $5000, 2nd place - $3000, 3rd place - $2000
 - 4481: 1st place - $30,000, 2nd place - $15,000, 3rd place - $5,000
 - 4488: 1st Place - $7,000, 2nd Place - $5,000, 3rd Place - $3,000
 - 4521: 1st place - $5,000, 2nd place - $3,000, 3rd place - $2,000
 - 4571: 1st place - $50,000, 2nd place - $20,000, 3rd place - $10,000
 - 4594: 1st place - $15,000, 2nd place - $10,000, 3rd place - $5,000
 - 4657: 1st place - $10,000, 2nd place - $6,000, 3rd place - $4,000
 - 4699: 1st place - $15,000, 2nd place - $10,000, 3rd place - $5,000
 - 4729: 1st place - $125,000, 2nd place - $50,000, 3rd place - $25,000
 - 4852: 1st place - $15,000, 2nd place - $10,000, 3rd place - $5,000
 - 4853: 1st place - $20,000, 2nd place - $12,000, 3rd place - $8,000
 - 4986: 1st place - $30,000, 2nd place - $20,000, 3rd place - $10,000
 - 5048: 1st place - $30,000, 2nd place - $20,000, 3rd place - $15,000
 - 5056: 1st place - $12,500, 2nd place - $7,500, 3rd place -  $5,000
 - 5144: 1st place - $50,000, 2nd place - $30,000, 3rd place - $20,000
 - 5174: 1st place - $10,000, 2nd place - $7,000, 3rd place - $3,000
 - 5229: 1st place - $40,000, 2nd place - $25,000, 3rd place - $10,000
 - 5260: 1st place - $12,000, 2nd place - $8,000, 3rd place - $5,000
 - 5261: 1st place - $25,000, 2nd place - $15,000, 3rd place - $10,000
 - 5340: 1st place - $12,500, 2nd place - $7,500, 3rd place - $5,000
 - 5357: 1st place - $15,000, 2nd place - $10,000, 3rd place - $5,000
 - 5390: 1st place - $10,000, 2nd place - $6,000, 3rd place - $4,000
 - 5497: 1st place - $12,000, 2nd place - $8,000, 3rd place - $5,000
 - 5558: 1st place - $30,000, 2nd place - $20,000, 3rd place - $10,000
 - 5916: 1st place - $50,000, 2nd place - $30,000, 3rd place - $20,000
 - 6116: 1st place - $12,000, 2nd place - $8,000, 3rd place - $5,000
 - 6243: 1st place - $50,000, 2nd place - $20,000, 3rd place - $10,000
 - 6277: 1st place - $12,500, 2nd place - $8,500, 3rd place - $4,000
 - 6322: 1st place - $30,000, 2nd place - $20,000, 3rd place - $10,000
 - 6392: 1st place - $12,000, 2nd place - $8,000, 3rd place - $5,000
 - 6565: 1st place - $12,000, 2nd place - $8,000, 3rd place - $5,000
 - 6644: 1. $12,000 2. $8,000 3. $5,000
 - 6649: complicated <<<< A REALLY BIG PRIZE
 - 6768: $12,000, $8,000, $5,000
 - 6841: 1st place - $10,000, 2nd place - $3,000, 3rd place - $2,000
 - 6927: 1st place - $12,000, 2nd place - $8,000, 3rd place - $5,000
 - 7042: 1st place - $12,000, 2nd place - $8,000, 3rd place - $5,000
 - 7043: 1st place - $12,000, 2nd place - $8,000, 3rd place - $5,000
 - 7082: 1st place - $12,000, 2nd place - $8,000, 3rd place - $5,000
 - 7115: 1st place - $12,000, 2nd place - $8,000, 3rd place - $5,000

In [None]:
contest_3prize_gap = {
	2509: 3000,  # normal
	2518: 3000,  # normal
	2551: 1500,  # small
	2602: 2000,
	2606: 1500,
	2609: 1000,
	2667: 30000,  # small
	2732: 5000,
	2748: 3000,  # normal
	2749: 3000,  # small
	2780: 4000,
	2840: 10000,
	2888: 2000,
	2917: 3000,  # normal
	2969: 6500,
	2975: 12000,  # normal
	2984: 5000,  # normal
	3043: 5000,
	3046: 5000,  # normal
	3064: 200,  # small
	3080: 90,  # small
	3084: 7000,
	3175: 6000,
	3288: 1000,  # small
	3316: 4000,
	3338: 1500,  # small
	3342: 1000,
	3354: 200,
	3517: 100,  # small
	3599: 1000,
	3641: 1500,  # small
	3756: 2000,
	3772: 2500,
	3774: 500,  # small
	3800: 3000,  # small
	3887: 3000,  # normal
	3926: 300,   # small
	3929: 8000,  # small
	3934: 6000,
	3951: 4000,  # normal
	3960: 8000,  # small
	3966: 3000,
	3978: 55000,
	4031: 5000,  # small
	4043: 200,  # small
	4104: 20000,  # small
	4117: 9000,
	4120: 7000,
	4272: 5000,
	4280: 2000,
	4366: 8000,  # small
	4407: 4000,  # small
	4438: 4000,
	4453: 2000,  # small
	4467: 5000,
	4471: 4000,  # normal
	4477: 2000,  # small
	4481: 15000,  # normal
	4488: 2000,  # small
	4521: 2000,
	4571: 30000,
	4594: 5000,
	4657: 4000,  # small
	4699: 5000,  # small
	4729: 75000,
	4852: 5000,
	4853: 8000,
	4986: 10000,  # small
	5048: 10000,
	5056: 5000,  # small
	5144: 20000,  # small
	5174: 3000,  # small
	5229: 15000,  # small
	5260: 4000,
	5261: 10000,  # small
	5340: 5000,
	5357: 5000,  # small
	5390: 4000,  # small
	5497: 4000,  # small
	5558: 10000,
	5916: 20000,  # normal
	6116: 4000,
	6243: 30000,
	6277: 4000,
	6322: 10000,  # small
	6392: 4000,
	6565: 4000,
	6644: 4000,  # normal
	6768: 4000,
	6841: 7000,
	6927: 4000,  # small
	7042: 4000,
	7043: 4000,
	7082: 4000,  # normal
	7115: 4000,  # small
}

In [358]:
# # check prize info by:
contest_id = 7115

display(HTML(tbl_contest_3_Usd_prize.loc[tbl_contest_3_Usd_prize['Id']==contest_id, ['Overview']].values[0][0]))

In [359]:
# Contest settings
deadline, total_prize, max_daily_submit, percentage = contest_basic_setting(tbl_contests, contest_id)
print('>>> percentage =', percentage, '%')
print('>>> daily submit (max) =', max_daily_submit)
print('>>> deadline =', deadline)

# Display basic info
tbl_submissions_specific = contest_basic_submission_info(tbl_submissions, contest_id)

# Create leaderboard
leaderboard_pub, leaderboard_pri = leaderboard_fulfill(
		tbl_submissions_specific, deadline, 'Normal')
display(leaderboard_pri.display(-1, 10))
display(leaderboard_pub.display(-1, 10))

>>> percentage = 30 %
>>> daily submit (max) = 5
>>> deadline = 2017-12-15 00:00:00
>>> How many teams are there in this contest?
629
>>> How many submissions are there in total?
3185
>>> List the number of submissions for the most active 5 teams:
TeamId
917620    56
915631    48
939322    47
948083    38
915734    38
dtype: int64


(datetime.datetime(2017, 12, 14, 23, 58, 38),
       rank     score  submit_count  participate_days  last_submit_days_ago
 0   917022  0.793391            19                59                     0
 1   917620  0.792615            56                77                     0
 2   928461  0.788715            32                80                     0
 3   939322  0.786606            47                81                     0
 4   914725  0.782443            38                62                     0
 5   918309  0.777145             9                88                     0
 6   916474  0.776112            28                78                     0
 7   956406  0.774093            11                54                     0
 8   917910  0.772199            12                30                     0
 9  1047807  0.769721            23                63                     0)

(datetime.datetime(2017, 12, 14, 23, 58, 38),
      rank    score  submit_count  participate_days  last_submit_days_ago
 0  917022  0.79172            19                59                     0
 1  917620  0.79088            56                77                     0
 2  928461  0.78671            32                80                     0
 3  939322  0.78455            47                81                     0
 4  914725  0.78049            38                62                     0
 5  918309  0.77582             9                88                     0
 6  916474  0.77434            28                78                     0
 7  956406  0.77229            11                54                     0
 8  917910  0.77032            12                30                     0
 9  915131  0.76867            26                81                     0)

In [360]:
# save_to_device(contest_id, 'Percentage_Big', prize=contest_3prize_gap[contest_id])
save_to_device(contest_id, 'Percentage_Small', prize=contest_3prize_gap[contest_id])
# save_to_device(contest_id, 'Normal', prize=contest_3prize_gap[contest_id])