In [5]:
# import sys
# sys.path.append('../..')
from datamart import search, augment
from datamart.utilities.utils import Utils, SEARCH_URL
import pandas as pd
import json

In [6]:
old_df = pd.read_csv("example/fifa_example/fifa.csv")
print("- READ THE SUPPLY DATASET -\n")
print(old_df.head())

- READ THE SUPPLY DATASET -

   d3mIndex  GameID        Date          Team      Opponent  \
0         0       0  14-06-2018        Russia  Saudi Arabia   
1         1       1  14-06-2018  Saudi Arabia        Russia   
2         2       2  15-06-2018         Egypt       Uruguay   
3         3       3  15-06-2018       Uruguay         Egypt   
4         4       4  15-06-2018       Morocco          Iran   

   Ball Possession %  Off-Target  Blocked  Offsides  Saves  Pass Accuracy %  \
0                 40           3        3         3      0               78   
1                 60           3        3         1      2               86   
2                 43           3        2         1      3               78   
3                 57           6        4         1      3               86   
4                 64           6        4         0      2               86   

   Passes  Distance Covered (Kms)  Yellow & Red  Man of the Match  1st Goal  \
0     306                     118     

In [7]:
with open("example/fifa_example/fifa_query.json") as f_json:
    query_json = json.load(f_json)
results = search(SEARCH_URL, query_json, old_df)
print("- SEARCH DATAMART BY A DESCRIPTION JSON OBJECT -\n")
print("Returned %d Datasets" % len(results))
for res in results:
    print(res.summary)

- SEARCH DATAMART BY A DESCRIPTION JSON OBJECT -

Returned 1 Datasets
 - FIFA World Cup -
    * Datamart ID: 127860000
    * Score: 86.15055
    * Description: FIFA World Cup
    * URL: https://www.football-data.org
    * Columns: 
	[0] id 
	[1] season_id 
	[2] season_startDate 
	[3] season_endDate 
	[4] season_currentMatchday 
	 ... 
	[47] referees_7_name 
	[48] referees_7_nationality 
	[49] referees_8_id 
	[50] referees_8_name 
	[51] referees_8_nationality 
    * Recommend Join Columns: 
	    Original Columns <-> datamart.Dataset Columns
	                 [3] <-> [24]                
	                 [4] <-> [22]                
        


In [8]:
new_df = Utils.get_dataset(metadata=results[0].metadata)
print("- MATERIALIZE SEARCH RESULTS -\n")
print(new_df.head())

- MATERIALIZE SEARCH RESULTS -

       id  season_id season_startDate season_endDate  season_currentMatchday  \
0  200000          1       2018-06-14     2018-07-15                       3   
1  200001          1       2018-06-14     2018-07-15                       3   
2  200006          1       2018-06-14     2018-07-15                       3   
3  200007          1       2018-06-14     2018-07-15                       3   
4  200012          1       2018-06-14     2018-07-15                       3   

                utcDate    status  matchday        stage    group  \
0  2018-06-14T15:00:00Z  FINISHED       1.0  GROUP_STAGE  Group A   
1  2018-06-15T12:00:00Z  FINISHED       1.0  GROUP_STAGE  Group A   
2  2018-06-15T15:00:00Z  FINISHED       1.0  GROUP_STAGE  Group B   
3  2018-06-15T18:00:00Z  FINISHED       1.0  GROUP_STAGE  Group B   
4  2018-06-16T10:00:00Z  FINISHED       1.0  GROUP_STAGE  Group C   

            ...           referees_5_nationality referees_6_id  \
0     

In [9]:
target_dataset = results[0]

print("- AUGMENT BY THE SEARCHED RESULT -\n")
result = augment(original_data=old_df, 
                 augment_data=target_dataset, 
                 joining_columns=([ ['Team'],          ['Opponent']      ],
                                  [ ['homeTeam_name'], ['awayTeam_name'] ]) 
                )
print(result.df.head())

- AUGMENT BY THE SEARCHED RESULT -



   d3mIndex  GameID        Date          Team      Opponent  \
0         0       0  14-06-2018        Russia  Saudi Arabia   
1         1       1  14-06-2018  Saudi Arabia        Russia   
2         2       2  15-06-2018         Egypt       Uruguay   
3         3       3  15-06-2018       Uruguay         Egypt   
4         4       4  15-06-2018       Morocco          Iran   

   Ball Possession %  Off-Target  Blocked  Offsides  Saves  \
0                 40           3        3         3      0   
1                 60           3        3         1      2   
2                 43           3        2         1      3   
3                 57           6        4         1      3   
4                 64           6        4         0      2   

            ...            referees_5_nationality  referees_6_id  \
0           ...                               NaN        49527.0   
1           ...                               NaN            NaN   
2           ...                             

In [12]:
print(result.matched_rows)
print()
print(result.cover_ratio)

[0, None, 1, None, 2, None, 3, None, 4, None, 5, None, 6, None, 7, None, 8, None, 9, None, 10, None, 11, None, 12, None, 13, None, 14, None, 15, None, 16, None, 17, None, 18, None, 19, None, 20, None, 21, None, 22, None, 23, None, 24, None, 25, None, 26, None, 27, None, 28, None, 29, None, 30, None, 31, None, 33, None, 32, None, 35, None, 34, None, 37, None, 36, None, 38, None, 39, None, 40, None, 41, None, 42, None, 43, None, 44, None, 45, None, 47, None, 46, 62, 48, None, 49, None, 50, None, 51, None, 52, None, 53, None, 54, None, 55, None, 56, None, 57, None, 58, None, 59, None, 60, None, 61, None, 62, 46, 63, None]

0.5078125
