# Wikidata Wikifier Service

In [43]:
import os
import requests
import pandas as pd
from io import StringIO

## Setup parameters

In [58]:
wikifier_service_url = "http://ckg07:1703/wikify"
# wikifier_service_url = "http://localhost:1703/wikify"
input_file = '/Users/amandeep/Downloads/companies.csv'
column_to_wikify = "Company"


## Peek at the input file

In [59]:
pd.read_csv(input_file).fillna("")

Unnamed: 0,Rank,Company,Headquarters,Industry,Revenue (billion $),Profits (billion $),Assets (billion $),Market Value (billion $)
0,1,Citigroup,USA,Banking,146.56,21.54,1884.32,247.42
1,2,Bank of America,USA,Banking,116.57,21.13,1459.74,226.61
2,3,HSBC,UK,Banking,121.51,16.63,1860.76,202.29
3,4,General Electric,USA,Conglomerate,163.39,20.83,697.24,358.98
4,5,JPMorgan Chase & Co.,USA,Banking,99.3,14.44,1351.52,170.97
5,6,American International Group,USA,Insurance,113.19,14.01,979.41,174.47
6,7,ExxonMobil,USA,Oil and gas,335.09,39.5,223.95,410.65
7,8,Royal Dutch Shell,Netherlands,Oil and gas,318.85,25.44,232.31,208.25
8,9,UBS,Switzerland,Diversified Financials,105.59,9.78,1776.89,116.84
9,10,ING Group,Netherlands,Diversified Financials,153.44,9.65,1615.05,93.99


## Call via Python

In [60]:
def call_wikifier(url, k=1):
    file_name = os.path.basename(input_file)
    url += f'?k={k}&columns={column_to_wikify}'

    files = {
        'file': (file_name, open(input_file, mode='rb'), 'application/octet-stream')
    }
    resp = requests.post(url, files=files)

    s = str(resp.content, 'utf-8')

    data = StringIO(s)

    return pd.read_csv(data, header=None)

In [63]:
df = call_wikifier(wikifier_service_url, k=3)

In [64]:
df.fillna("")

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,Rank,Company,Headquarters,Industry,Revenue (billion $),Profits (billion $),Assets (billion $),Market Value (billion $),kg_id,kg_label,score
1,1,Citigroup,USA,Banking,146.56,21.54,1884.32,247.42,Q219508|Q1023765|Q856322,Citigroup|CIT Group|CITIC Group,0.99|-0.34|-0.7
2,2,Bank of America,USA,Banking,116.57,21.13,1459.74,226.61,Q487907|Q50316068|Q2882627,Bank of America|Bank of America|Bank of Africa,0.97|0.28|-0.01
3,3,HSBC,UK,Banking,121.51,16.63,1860.76,202.29,Q190464|Q29502311|Q29506693,HSBC|Hsbc|Hsbc,0.42|0.0|-0.01
4,4,General Electric,USA,Conglomerate,163.39,20.83,697.24,358.98,Q54173|Q844762|Q5531862,General Electric|General Electric|General Elec...,0.85|0.32|-0.24
5,5,JPMorgan Chase & Co.,USA,Banking,99.30,14.44,1351.52,170.97,Q192314|Q5087090|Q1676687,JPMorgan Chase|Chase|270 Park Avenue,0.63|-0.28|-0.32
6,6,American International Group,USA,Insurance,113.19,14.01,979.41,174.47,Q212235|Q4745572|Q4743779,American International Group|Americana Group|A...,0.97|-0.18|-0.21
7,7,ExxonMobil,USA,Oil and gas,335.09,39.50,223.95,410.65,Q156238|Q39946001|Q30265945,ExxonMobil|ExxonMobil Upstream Research Co|Exx...,1.0|-0.8|-0.83
8,8,Royal Dutch Shell,Netherlands,Oil and gas,318.85,25.44,232.31,208.25,Q154950|Q98551764|Q27612595,Royal Dutch Shell|Jeff Dutch|Gin Dutch,1.0|-0.56|-0.58
9,9,UBS,Switzerland,Diversified Financials,105.59,9.78,1776.89,116.84,Q446117|Q193199|Q45137700,UBS|UBS|UBS Optimus Foundation,0.15|-0.45|-0.71


## Call using `curl`

In [32]:
url  =  f'{wikifier_service_url}?k=3&columns={column_to_wikify}'

In [35]:
!echo " curl -XPOST -F "file=@$input_file"  $url "

 curl -XPOST -F file=@/Users/amandeep/Github/wikidata-wikifier/wikifier/sample_files/cricketers.csv  https://dsbox02.isi.edu:8888/wikifier/wikify?k=3&columns=cricketers 
