In [9]:
import sys
sys.path.append('../..')
from datamart import search, augment
import json
import pandas as pd
from datamart.utilities.utils import SEARCH_URL

In [10]:
hof_df = pd.read_csv('hof.csv')
print('----- ORIGINAL DATA ------')
print(hof_df.head(5))

----- ORIGINAL DATA ------
   Year         Name Inducted members     Inducted by [11]
0  1986  Chuck Berry              NaN  Keith Richards [N2]
1  1986  James Brown              NaN   Steve Winwood [N2]
2  1986  Ray Charles              NaN    Quincy Jones [N2]
3  1986    Sam Cooke              NaN     Herb Alpert [N2]
4  1986  Fats Domino              NaN      Billy Joel [N2]


In [11]:
query = {
    "dataset": {
        "about": "rock and roll, music, rock music, rock artist, rock band, music award, artist award, hall of fame, singer"
    },
    "required_variables": [
        {
            "type": "dataframe_columns",
            "index": [1]
        }
    ]
}
candidates = search(SEARCH_URL, query, hof_df)
print('----- SEARCH RESULTS (got %d results)-----' % len(candidates))
for cand in candidates:
    print(cand.summary)

----- SEARCH RESULTS (got 3 results)-----
 - SUMMARY OF THE DATAMART DATASET -
 * Datamart ID: 285020000
 * Score: 584.553
 * Title: WIKIDATA_PROP_DISCOGRAPHY
 * Description: link to discography in artist or band page
 * URL: https://www.wikidata.org/wiki/Property:P358
 * Columns: 
	[0] source 
	[1] category 
	[2] prop_value 
	[3] subject_label (http://www.wikidata.org/entity/Q163722, http://www.wikidata.org/entity/Q125886, http://www.wikidata.org/entity/Q126584 ...)
	[4] value_label (Karel Gott discography, Selena Gomez & the Scene discography, Eurythmics discography ...)
 * Recommend Join Columns: 
	    Original Columns <-> datamart.Dataset Columns
	                 [1] <-> [4]                 
        
 - SUMMARY OF THE DATAMART DATASET -
 * Datamart ID: 282040000
 * Score: 421.00208
 * Title: WIKIDATA_PROP_COMPOSER
 * Description: person(s) who wrote the music [for lyricist, use "lyrics by" (P676)]
 * URL: https://www.wikidata.org/wiki/Property:P86
 * Columns: 
	[0] source 
	[1] ca

In [13]:
print('----- AUGMENT WITH THE 2ND RESULT -----')
print('(It can be very slow to materialize the results datasets, as each wikidata property can have tons of records)')
res = augment(original_data=hof_df, 
              augment_data=candidates[1],
              joining_columns=([['Name']], 
                               [['value_label']])
             )
print(res.head())
res.to_csv('augmented.csv', index=False)


----- AUGMENT WITH THE 2ND RESULT -----
(It can be very slow to materialize the results datasets, as each wikidata property can have tons of records)
   Year         Name Inducted members     Inducted by [11]  \
0  1986  Chuck Berry              NaN  Keith Richards [N2]   
1  1986  James Brown              NaN   Steve Winwood [N2]   
2  1986  Ray Charles              NaN    Quincy Jones [N2]   
3  1986    Sam Cooke              NaN     Herb Alpert [N2]   
4  1986  Fats Domino              NaN      Billy Joel [N2]   

                                     source subject_label category  \
0                                       NaN           NaN      NaN   
1  http://www.wikidata.org/entity/Q18811649   Zero Degree     film   
2                                       NaN           NaN      NaN   
3                                       NaN           NaN      NaN   
4                                       NaN           NaN      NaN   

                                prop_value  
0          