In [2]:
!pip install datasets transformers pandasql dask -qqq

In [2]:
from transformers import T5TokenizerFast
model_checkpoint = "t5-small"
model_date = '2021-10-03'
prefix = "translate English to Sparql: "
model_path=f'../../../data/models/sparql-translator-{model_date}-{model_checkpoint}-fp16'
tokenizer = T5TokenizerFast.from_pretrained(model_checkpoint, src_lang="en", tgt_lang="sparql")

In [3]:
from transformers import pipeline
from transformers import AutoModelForSeq2SeqLM
model = AutoModelForSeq2SeqLM.from_pretrained(model_path).to("cuda")
translator = pipeline(
    "translation_xx_to_yy",
    model=model,
    tokenizer=tokenizer,
    device=0 #0 for cuda, -1 for cpu
)

In [4]:
import pandas as pd
import re
df_q = pd.read_csv('../../../data/kdwd/q_19174151.csv').set_index('item_id')
df_p = pd.read_csv('../../../data/kdwd/p_12267.csv').set_index('property_id')

In [5]:
print(df_p.shape, df_q.shape)

(12267, 1) (19174151, 1)


In [6]:
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())

In [8]:
pysqldf("SELECT * FROM df_p WHERE en_label LIKE '%map%' LIMIT 10;").head()

Unnamed: 0,property_id,en_label
0,15,route map
1,181,taxon range map image
2,242,locator map image
3,1621,detail map
4,1846,distribution map


In [9]:
query_result = pysqldf("SELECT * FROM df_p WHERE en_label LIKE '%web%science%' LIMIT 1;")
if ~query_result.empty:
    print(query_result.iloc[0]['property_id'])

1053


In [7]:
def encode_props(qry):
    # Q
    for m in re.finditer(":Q\d+", qry):
        x = m.group(0)[1:]
        newstring = df_q.loc[int(x[1:])]['en_label'].iloc[0].replace(" ", "_").replace("-", "_")
        qry = qry.replace(x, newstring)
    # P
    for m in re.finditer(":P\d+", qry):
        x = m.group(0)[1:]
        newstring = df_p.loc[int(x[1:])]['en_label'].iloc[0].replace(" ", "_").replace("-", "_")
        qry = qry.replace(x, newstring)
    return qry
# Test
encode_props('SELECT ?obj WHERE { wd:Q1045 p:P1082 ?s . ?s ps:P1082 ?obj . ?s pq:P585 ?x filter(contains(YEAR(?x),\'2009\')) }')
# "select ?obj where [ wd:somalia p:population ?s . ?s ps:population ?obj . ?s pq:point_in_time ?x filter(contains(YEAR(?x),'2009')) ]"

"SELECT ?obj WHERE { wd:somalia p:population ?s . ?s ps:population ?obj . ?s pq:point_in_time ?x filter(contains(YEAR(?x),'2009')) }"

In [8]:
def replace_all(text, dict):
    for i, j in dict.items():
        text = text.replace(i, j)
    return text


def decode_props(qry):
    rep_dict = {}
    qry = qry.replace("[", "{").replace("]", "}")
    for m in re.finditer(":\w+", qry):
        pref = qry[m.start(0) - 2 : m.start(0)]
        try:
            x = m.group(0)[1:].replace("_", " ")
            if "wd" not in pref:
                word = x.replace("-", "%").replace(" ", "%")
                query_result = pysqldf(
                    f"SELECT * FROM df_p WHERE en_label LIKE '%{word}%' LIMIT 1;"
                )
                if not query_result.empty:
                    rep_dict[x] = "P" + str(query_result.iloc[0]["property_id"])
                    continue
                # s = df_p[df_p["en_label"] == x.replace("_", " ")]
                # if len(s) > 0:
                # rep_dict[x] = "P" + str(s.iloc[0][0])
                # continue
            # Use else to do not replace missing Ps with Qs
            # s = df_q[df_q["en_label"] == x]
            s = df_q[df_q.en_label.str.contains(x, na=False)].sort_values(by='item_id')
            rep_dict[x] = "Q" + str(s.iloc[0].name)
        except:
            print(f"Could not find {x} in our database.")
    return (qry, replace_all(qry, rep_dict))


# Test
decode_props(
    "select ?obj where [ wd:somalia p:population ?s . ?s ps:population ?obj . ?s pq:point_in_time ?x filter(contains(YEAR(?x),'2009')) ]"
)
# rplc_props('SELECT ?obj WHERE { wd:Q1045 p:P1082 ?s . ?s ps:P1082 ?obj . ?s pq:P585 ?x filter(contains(YEAR(?x),\'2009\')) }')


("select ?obj where { wd:somalia p:population ?s . ?s ps:population ?obj . ?s pq:point_in_time ?x filter(contains(YEAR(?x),'2009')) }",
 "select ?obj where { wd:Q1045 p:P1082 ?s . ?s ps:P1082 ?obj . ?s pq:point_in_time ?x filter(contains(YEAR(?x),'2009')) }")

In [None]:
def decode_props(qry):
    rep_dict = {}
    qry = qry.replace("[", "{").replace("]", "}")
    for m in re.finditer(":\w+", qry):
        pref = qry[m.start(0) - 2 : m.start(0)]
        try:
            x = m.group(0)[1:]
            if "wd" not in pref:
                s = df_p[df_p.en_label.str.contains(x.replace("_", " "), na=False)].sort_values(by='property_id')
                if not s.empty:
                    rep_dict[x] = "P" + str(s.iloc[0].name)
                    continue
            s2 = df_q[df_q.en_label.str.contains(x.replace("_", " "), na=False)].sort_values(by='item_id')
            rep_dict[x] = "Q" + str(s2.iloc[0].name)
        except:
            print(f"Could not find {x} in our database.")
    return (qry, replace_all(qry, rep_dict))


In [9]:
translate= lambda q: decode_props(translator(prefix+q, max_length=100)[0]['translation_text'])

In [13]:
!pip install beautifultable -qqq

In [10]:
def pretty_translate(t,q): 
    ans = translate(q)
    # print('QUESTION:\n' + q + '\nTARGET:\n'+ t +'\nRESULT:\n' + ans[0]+ '\nRESULT-DECODED:\n' + ans[1])
    from beautifultable import BeautifulTable
    table = BeautifulTable(maxwidth=140)
    # table.column_headers = ["", "Man Utd","Man City","T Hotspur"]
    table.rows.append(['QUESTION', q])
    table.rows.append(['Target', t])
    table.rows.append(['RESULT-DECODED', ans[1]])
    table.rows.append(['RESULT-Raw', ans[0]])
    print(table)

# TESTING

In [86]:
translator(prefix+'Who is Marlin Manson?', max_length=100)

[{'translation_text': 'select ?answer where [ wd:marlin_manson wdt:instance_of wd:marlin_manson . ?answer wdt:instance_of wd:marlin_manson ]'}]

In [87]:
# <re.Match object; span=(52, 66), match=':marlin_manson'>
# ('select distinct ?sbj where { ?sbj wdt:instance_of wd:marlin_manson . ?sbj wdt:instance_of wd:human }',
#  'select distinct ?sbj where { ?sbj wdt:P31 wd:marlin_manson . ?sbj wdt:P31 wd:Q5 }')
translate('Who is Marlin Manson?')

Could not find marlin_manson in our database.
Could not find marlin_manson in our database.
Could not find marlin_manson in our database.


('select ?answer where { wd:marlin_manson wdt:instance_of wd:marlin_manson . ?answer wdt:instance_of wd:marlin_manson }',
 'select ?answer where { wd:marlin_manson wdt:P31 wd:marlin_manson . ?answer wdt:P31 wd:marlin_manson }')

In [88]:
translate('Who is Bill Gates?')

{'bill_gates': 'Q5284'}
Could not find law_gates in our database.
Could not find law_gates in our database.


('select ?answer where { wd:bill_gates wdt:instance_of wd:law_gates . ?answer wdt:instance_of wd:law_gates }',
 'select ?answer where { wd:Q5284 wdt:P31 wd:law_gates . ?answer wdt:P31 wd:law_gates }')

In [89]:
translate('What is the time zone of Salt Lake City?')

{'salt_lake_city': 'Q9668'}


('select ?answer where { wd:salt_lake_city wdt:time_zone ?answer}',
 'select ?answer where { wd:Q9668 wdt:P421 ?answer}')

In [91]:
pretty_translate('select distinct ?sbj where { ?sbj wdt:P35 wd:Q127998 . ?sbj wdt:P31 wd:Q6256 }','Who is the country for head of state of Mahmoud Abbas?')

Could not find mamoud_abbas in our database.
Could not find country_of_state in our database.
+----------------+--------------------------------------------------------------------------------+
|    QUESTION    |             Who is the country for head of state of Mahmoud Abbas?             |
+----------------+--------------------------------------------------------------------------------+
|     Target     | select distinct ?sbj where { ?sbj wdt:P35 wd:Q127998 . ?sbj wdt:P31 wd:Q6256 } |
+----------------+--------------------------------------------------------------------------------+
| RESULT-DECODED |      select ?answer where { wd:mamoud_abbas wdt:country_of_state ?answer}      |
+----------------+--------------------------------------------------------------------------------+
|   RESULT-Raw   |      select ?answer where { wd:mamoud_abbas wdt:country_of_state ?answer}      |
+----------------+--------------------------------------------------------------------------------+


In [21]:
pretty_translate('select distinct ?sbj where { ?sbj wdt:P35 wd:Q127998 . ?sbj wdt:P31 wd:Q6256 }',"What country is Mahmoud Abbas the head of state of?")

+----------------+----------------------------------------------------------------------------------------------------------+
|    QUESTION    |                           What country is Mahmoud Abbas the head of state of?                            |
+----------------+----------------------------------------------------------------------------------------------------------+
|     Target     |              select distinct ?sbj where { ?sbj wdt:P35 wd:Q127998 . ?sbj wdt:P31 wd:Q6256 }              |
+----------------+----------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |              select distinct ?sbj where { ?sbj wdt:P35 wd:Q127998 . ?sbj wdt:P31 wd:Q6256 }              |
+----------------+----------------------------------------------------------------------------------------------------------+
|   RESULT-Raw   | select distinct ?sbj where { ?sbj wdt:head_of_state wd:mahmoud_abbas . ?sbj wdt:instance_of wd:coun

In [22]:
pretty_translate('select distinct ?sbj where { ?sbj wdt:P35 wd:Q127998 . ?sbj wdt:P31 wd:Q6256 }','Who is the country for head of state of Mahmoud Abbas?')

+----------------+----------------------------------------------------------------------------------------------------------+
|    QUESTION    |                          Who is the country for head of state of Mahmoud Abbas?                          |
+----------------+----------------------------------------------------------------------------------------------------------+
|     Target     |              select distinct ?sbj where { ?sbj wdt:P35 wd:Q127998 . ?sbj wdt:P31 wd:Q6256 }              |
+----------------+----------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |              select distinct ?obj where { wd:Q127998 wdt:P35 ?obj . ?obj wdt:P31 wd:Q6256 }              |
+----------------+----------------------------------------------------------------------------------------------------------+
|   RESULT-Raw   | select distinct ?obj where { wd:mahmoud_abbas wdt:head_of_state ?obj . ?obj wdt:instance_of wd:coun

In [23]:
pretty_translate("SELECT ?answer WHERE { wd:Q16538 wdt:P725 ?answer . ?answer wdt:P106 wd:Q177220}","Which female actress is the voice over on South Park and is employed as a singer?".lower())

+----------------+-----------------------------------------------------------------------------------------------+
|    QUESTION    |       which female actress is the voice over on south park and is employed as a singer?       |
+----------------+-----------------------------------------------------------------------------------------------+
|     Target     |       SELECT ?answer WHERE { wd:Q16538 wdt:P725 ?answer . ?answer wdt:P106 wd:Q177220}        |
+----------------+-----------------------------------------------------------------------------------------------+
| RESULT-DECODED |     select ?answer where { wd:Q16538 wdt:Q12280274 ?answer . ?answer wdt:P108 wd:Q177220}     |
+----------------+-----------------------------------------------------------------------------------------------+
|   RESULT-Raw   | select ?answer where { wd:south_park wdt:voice_over ?answer . ?answer wdt:employer wd:singer} |
+----------------+--------------------------------------------------------------

In [24]:
pretty_translate("SELECT ?answer WHERE { wd:Q16538 wdt:P725 ?answer . ?answer wdt:P106 wd:Q177220}","Which female actress on South Park is the voice over and is used as a singer?")

+----------------+-----------------------------------------------------------------------------------+
|    QUESTION    |   Which female actress on South Park is the voice over and is used as a singer?   |
+----------------+-----------------------------------------------------------------------------------+
|     Target     | SELECT ?answer WHERE { wd:Q16538 wdt:P725 ?answer . ?answer wdt:P106 wd:Q177220}  |
+----------------+-----------------------------------------------------------------------------------+
| RESULT-DECODED |   select ?answer where { wd:Q16538 wdt:Q12280274 ?answer . ?answer wdt:Q177220}   |
+----------------+-----------------------------------------------------------------------------------+
|   RESULT-Raw   | select ?answer where { wd:south_park wdt:voice_over ?answer . ?answer wdt:singer} |
+----------------+-----------------------------------------------------------------------------------+


In [25]:
# Paul Erdős (Q173746)
pretty_translate("select distinct ?answer where { wd:Q173746 wdt:P3973 ?answer}","Which is the PIM authority ID of Paul Erd?")

<re.Match object; span=(25, 34), match=':paul_erd'>
+----------------+-----------------------------------------------------------------------------------+
|    QUESTION    |                    Which is the PIM authority ID of Paul Erd?                     |
+----------------+-----------------------------------------------------------------------------------+
|     Target     |           select distinct ?answer where { wd:Q173746 wdt:P3973 ?answer}           |
+----------------+-----------------------------------------------------------------------------------+
| RESULT-DECODED |       select ?answer where { wd:paul_erd wdt:P797 ?X . ?X wdt:P31 ?answer}        |
+----------------+-----------------------------------------------------------------------------------+
|   RESULT-Raw   | select ?answer where { wd:paul_erd wdt:authority ?X . ?X wdt:instance_of ?answer} |
+----------------+-----------------------------------------------------------------------------------+


In [26]:
pretty_translate("SELECT ?obj WHERE { wd:Q1045 p:P1082 ?s . ?s ps:P1082 ?obj . ?s pq:P585 ?x filter(contains(YEAR(?x),'2009')) }", 
            "What was the population of Somalia in 2009-0-0?")

+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                     What was the population of Somalia in 2009-0-0?                                     |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     |     SELECT ?obj WHERE { wd:Q1045 p:P1082 ?s . ?s ps:P1082 ?obj . ?s pq:P585 ?x filter(contains(YEAR(?x),'2009')) }      |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |     select ?obj where { wd:Q1045 p:P1082 ?s . ?s ps:P1082 ?obj . ?s pq:P585 ?x filter(contains(YEAR(?x),'2009')) }      |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|   RESULT-Ra

In [27]:
translate('Humans born in New York City') #random query - answer seems correct

('select distinct ?sbj where { ?sbj wdt:place_of_birth wd:new_york_city . ?sbj wdt:instance_of wd:human }',
 'select distinct ?sbj where { ?sbj wdt:P19 wd:Q60 . ?sbj wdt:P31 wd:Q5 }')

In [28]:
# From QALD
target = "ASK WHERE { <http://dbpedia.org/resource/Taiko> a <http://dbpedia.org/class/yago/WikicatJapaneseMusicalInstruments> }"
q =  "Are Taiko some kind of Japanese musical instrument?"
pretty_translate(target, q)

+----------------+-----------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                  Are Taiko some kind of Japanese musical instrument?                                  |
+----------------+-----------------------------------------------------------------------------------------------------------------------+
|     Target     | ASK WHERE { <http://dbpedia.org/resource/Taiko> a <http://dbpedia.org/class/yago/WikicatJapaneseMusicalInstruments> } |
+----------------+-----------------------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |                                      ask where { wd:Q221769 wdt:P1303 wd:Q5287 }                                      |
+----------------+-----------------------------------------------------------------------------------------------------------------------+
|   RESULT-Raw   |         

In [29]:
# From QALD
target = "PREFIX dct: <http://purl.org/dc/terms/> PREFIX dbc: <http://dbpedia.org/resource/Category:> SELECT DISTINCT ?uri WHERE { ?uri dct:subject dbc:Assassins_of_Julius_Caesar }"
q =  "Who killed Caesar?"
pretty_translate(target, q)

<re.Match object; span=(37, 46), match=':death_of'>
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                                   Who killed Caesar?                                                    |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | PREFIX dct: <http://purl.org/dc/terms/> PREFIX dbc: <http://dbpedia.org/resource/Category:> SELECT DISTINCT ?uri WHERE  |
|                |                                   { ?uri dct:subject dbc:Assassins_of_Julius_Caesar }                                   |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |                     select distinct ?sbj where { ?sbj wdt:death_of wd:Q29288 . ?sbj

In [30]:
# From QALD
q = 'What is the highest mountain in Germany?'
target = "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX onto: <http://dbpedia.org/ontology/>  \nSELECT ?uri WHERE { ?uri rdf:type onto:Mountain ; onto:elevation ?elevation ; onto:locatedInArea <http://dbpedia.org/resource/Germany> } ORDER BY DESC(?elevation) LIMIT 1"
pretty_translate(target, q)

+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                        What is the highest mountain in Germany?                                         |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX o |
|                |                                          nto: <http://dbpedia.org/ontology/>                                            |
|                | SELECT ?uri WHERE { ?uri rdf:type onto:Mountain ; onto:elevation ?elevation ; onto:locatedInArea <http://dbpedia.org/re |
|                |                                   source/Germany> } ORDER BY DESC(?elevation) LIMIT 1                                   |
+------------

In [31]:
# From QALD
q = 'Which American presidents were in office during the Vietnam War?'
target = "PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> PREFIX dct: <http://purl.org/dc/terms/> PREFIX dbc: <http://dbpedia.org/resource/Category:> SELECT ?uri WHERE { ?uri dct:subject dbc:Presidents_of_the_United_States . res:Vietnam_War dbo:commander ?uri }"
pretty_translate(target, q)

<re.Match object; span=(90, 110), match=':american_presidency'>
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                            Which American presidents were in office during the Vietnam War?                             |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX res: <http://dbpedia.org/resource/> PREFIX dct: <http://purl.org/dc/t |
|                | erms/> PREFIX dbc: <http://dbpedia.org/resource/Category:> SELECT ?uri WHERE { ?uri dct:subject dbc:Presidents_of_the_U |
|                |                                   nited_States . res:Vietnam_War dbo:commander ?uri }                                   |
+----------------+------------------------------------------------------------------------

In [32]:
# From QALD
q = 'How many gold medals did Michael Phelps win at the 2008 Olympics?'
target = "PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dbr: <http://dbpedia.org/resource/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT Count(?sub) as ?c WHERE { ?sub dbo:goldMedalist dbr:Michael_Phelps . filter (contains (str(?sub), \"2008\") && contains (str(?sub), \"Olympics\")) }"
pretty_translate(target, q)

<re.Match object; span=(42, 60), match=':gold_medal_winner'>
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                            How many gold medals did Michael Phelps win at the 2008 Olympics?                            |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dbr: <http://dbpedia.org/resource/> PREFIX rdf: <http://www.w3.org/19 |
|                | 99/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT Count(?sub) as ?c WHERE { ?sub dbo |
|                |     :goldMedalist dbr:Michael_Phelps . filter (contains (str(?sub), "2008") && contains (str(?sub), "Olympics")) }      |
+----------------+---------------------------------------------------------------------------

In [33]:
# From QALD
q = 'What is the profession of Frank Herbert?'
target = "PREFIX dbpedia2: <http://dbpedia.org/property/> PREFIX res: <http://dbpedia.org/resource/> SELECT DISTINCT ?string WHERE { res:Frank_Herbert dbpedia2:occupation ?string }"
pretty_translate(target, q)

<re.Match object; span=(31, 45), match=':franc_herbert'>
<re.Match object; span=(91, 108), match=':professional_job'>
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                        What is the profession of Frank Herbert?                                         |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | PREFIX dbpedia2: <http://dbpedia.org/property/> PREFIX res: <http://dbpedia.org/resource/> SELECT DISTINCT ?string WHER |
|                |                                   E { res:Frank_Herbert dbpedia2:occupation ?string }                                   |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |            select

In [34]:
# From QALD
q = 'How many seats does the home stadium of FC Porto have?'
target = "PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dbp: <http://dbpedia.org/property/> PREFIX dbr: <http://dbpedia.org/resource/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX db: <http://dbpedia.org/> SELECT ?capacity WHERE { { dbr:FC_Porto dbo:ground ?ground . ?ground dbo:capacity ?capacity } UNION { dbr:FC_Porto dbo:ground ?ground . ?ground dbp:capacity ?capacity } }"
pretty_translate(target, q)

<re.Match object; span=(42, 55), match=':home_stadium'>
<re.Match object; span=(58, 67), match=':fc_porto'>
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                 How many seats does the home stadium of FC Porto have?                                  |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dbp: <http://dbpedia.org/property/> PREFIX dbr: <http://dbpedia.org/r |
|                | esource/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema# |
|                | > PREFIX db: <http://dbpedia.org/> SELECT ?capacity WHERE { { dbr:FC_Porto dbo:ground ?ground . ?ground dbo:capacity ?c |
|                |                 apacity } U

In [35]:
# From QALD
q = 'Which frequent flyer program has the most airlines?'
target = "SELECT ?uri WHERE { ?airline <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Airline> . ?airline <http://dbpedia.org/property/frequentFlyer> ?uri. } GROUP BY ?uri ORDER BY DESC(COUNT(DISTINCT ?airline)) OFFSET 0 LIMIT 1"
pretty_translate(target, q)

<re.Match object; span=(77, 89), match=':flight_time'>
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                   Which frequent flyer program has the most airlines?                                   |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | SELECT ?uri WHERE { ?airline <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Airline> .  |
|                | ?airline <http://dbpedia.org/property/frequentFlyer> ?uri. } GROUP BY ?uri ORDER BY DESC(COUNT(DISTINCT ?airline)) OFFS |
|                |                                                      ET 0 LIMIT 1                                                       |
+----------------+---------------------------------------------------------------------------------

In [36]:
# From QALD
q = 'Which European countries have a constitutional monarchy?'
target = "PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dct: <http://purl.org/dc/terms/> PREFIX dbc: <http://dbpedia.org/resource/Category:> PREFIX dbr: <http://dbpedia.org/resource/> SELECT ?uri WHERE { ?uri dct:subject dbc:Countries_in_Europe ; dbo:governmentType dbr:Constitutional_monarchy }"
pretty_translate(target, q)

+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                Which European countries have a constitutional monarchy?                                 |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX dct: <http://purl.org/dc/terms/> PREFIX dbc: <http://dbpedia.org/reso |
|                | urce/Category:> PREFIX dbr: <http://dbpedia.org/resource/> SELECT ?uri WHERE { ?uri dct:subject dbc:Countries_in_Europe |
|                |                                    ; dbo:governmentType dbr:Constitutional_monarchy }                                   |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECO

In [37]:
# From QALD
q = 'Which countries have places with more than two caves?'
target = "PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT DISTINCT ?uri WHERE { ?cave rdf:type dbo:Cave ; dbo:location ?uri . ?uri rdf:type dbo:Country } GROUP BY ?uri HAVING ( COUNT(?cave) > 2 )"
pretty_translate(target, q)

+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                  Which countries have places with more than two caves?                                  |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT DISTINCT ?u |
|                | ri WHERE { ?cave rdf:type dbo:Cave ; dbo:location ?uri . ?uri rdf:type dbo:Country } GROUP BY ?uri HAVING ( COUNT(?cave |
|                |                                                         ) > 2 )                                                         |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECO

In [38]:
# From QALD
q = 'Which airports are located in California, USA?'
target = "SELECT DISTINCT ?uri WHERE { ?uri a <http://dbpedia.org/ontology/Airport> { ?uri <http://dbpedia.org/ontology/location> <http://dbpedia.org/resource/California> } UNION { ?uri <http://dbpedia.org/ontology/city> <http://dbpedia.org/resource/California> } UNION { ?uri <http://dbpedia.org/ontology/city> ?city . ?city <http://dbpedia.org/ontology/isPartOf> <http://dbpedia.org/resource/California> } UNION { ?uri <http://dbpedia.org/ontology/operator> <http://dbpedia.org/resource/California> } }"
pretty_translate(target, q)

<re.Match object; span=(89, 101), match=':california_'>
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                     Which airports are located in California, USA?                                      |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | SELECT DISTINCT ?uri WHERE { ?uri a <http://dbpedia.org/ontology/Airport> { ?uri <http://dbpedia.org/ontology/location> |
|                |  <http://dbpedia.org/resource/California> } UNION { ?uri <http://dbpedia.org/ontology/city> <http://dbpedia.org/resourc |
|                | e/California> } UNION { ?uri <http://dbpedia.org/ontology/city> ?city . ?city <http://dbpedia.org/ontology/isPartOf> <h |
|                | ttp://dbpedia.org/resource/California> } UNION { ?uri <http://dbpedia.org/ontol

In [42]:
# From QALD
q = "What are the nicknames of San Francisco?"
target = "SELECT DISTINCT ?string WHERE { res:San_Francisco foaf:nick ?string }"
pretty_translate(target, q)

+----------------+-----------------------------------------------------------------------------------------------------+
|    QUESTION    |                              What are the nicknames of San Francisco?                               |
+----------------+-----------------------------------------------------------------------------------------------------+
|     Target     |                SELECT DISTINCT ?string WHERE { res:San_Francisco foaf:nick ?string }                |
+----------------+-----------------------------------------------------------------------------------------------------+
| RESULT-DECODED |            select distinct ?sbj where { ?sbj wdt:P138 wd:Q62 . ?sbj wdt:P31 wd:Q82799 }             |
+----------------+-----------------------------------------------------------------------------------------------------+
|   RESULT-Raw   | select distinct ?sbj where { ?sbj wdt:named_after wd:san_francisco . ?sbj wdt:instance_of wd:name } |
+----------------+--------------

In [43]:
# From QALD
q = "What is Angela Merkel’s birth name?"
target = "SELECT DISTINCT ?string WHERE { res:Angela_Merkel dbp:birthName ?string }"
pretty_translate(target, q)

+----------------+------------------------------------------------------------------------------------------------+
|    QUESTION    |                              What is Angela Merkel’s birth name?                               |
+----------------+------------------------------------------------------------------------------------------------+
|     Target     |           SELECT DISTINCT ?string WHERE { res:Angela_Merkel dbp:birthName ?string }            |
+----------------+------------------------------------------------------------------------------------------------+
| RESULT-DECODED |                select ?answer where { wd:Q567 wdt:P19 ?X . ?X wdt:P19 ?answer}                 |
+----------------+------------------------------------------------------------------------------------------------+
|   RESULT-Raw   | select ?answer where { wd:angela_merkel wdt:place_of_birth ?X . ?X wdt:place_of_birth ?answer} |
+----------------+------------------------------------------------------

In [44]:
# From QALD
q = "Who is the mayor of Berlin?"
target = "SELECT DISTINCT ?uri WHERE { res:Berlin dbp:leader ?uri }"
pretty_translate(target, q)

+----------------+------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                     Who is the mayor of Berlin?                                      |
+----------------+------------------------------------------------------------------------------------------------------+
|     Target     |                      SELECT DISTINCT ?uri WHERE { res:Berlin dbp:leader ?uri }                       |
+----------------+------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |                select distinct ?sbj where { ?sbj wdt:P6 wd:Q64 . ?sbj wdt:P31 wd:Q5 }                |
+----------------+------------------------------------------------------------------------------------------------------+
|   RESULT-Raw   | select distinct ?sbj where { ?sbj wdt:head_of_government wd:berlin . ?sbj wdt:instance_of wd:human } |
+----------------+------

In [45]:
# From QALD
q = "Which software has been published by Mean Hamster Software?"
target = "SELECT DISTINCT ?uri WHERE { ?uri rdf:type onto:Software { ?uri prop:publisher \"Mean Hamster Software\"@en } UNION { ?uri onto:publisher res:Mean_Hamster_Software } }"
pretty_translate(target, q)

+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                               Which software has been published by Mean Hamster Software?                               |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | SELECT DISTINCT ?uri WHERE { ?uri rdf:type onto:Software { ?uri prop:publisher "Mean Hamster Software"@en } UNION { ?ur |
|                |                                     i onto:publisher res:Mean_Hamster_Software } }                                      |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |                    select distinct ?sbj where { ?sbj wdt:P123 wd:Q15070395 . ?sbj wdt:P31 wd:Q7397 }                    |
+------------

In [46]:
# From QALD
q = "Which country was Bill Gates born in?"
target = "SELECT DISTINCT ?country WHERE { { dbr:Bill_Gates dbo:birthPlace ?birthPlace . ?birthPlace dbo:country ?country } UNION { dbr:Bill_Gates dbo:birthPlace ?birthPlace . ?birthPlace dbo:isPartOf ?place . ?place dbo:country ?country } }"
pretty_translate(target, q)

+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                          Which country was Bill Gates born in?                                          |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | SELECT DISTINCT ?country WHERE { { dbr:Bill_Gates dbo:birthPlace ?birthPlace . ?birthPlace dbo:country ?country } UNION |
|                |     { dbr:Bill_Gates dbo:birthPlace ?birthPlace . ?birthPlace dbo:isPartOf ?place . ?place dbo:country ?country } }     |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |                      select distinct ?sbj where { ?sbj wdt:P19 wd:Q5284 . ?sbj wdt:P31 wd:Q6256 }                       |
+------------

In [47]:
# From QALD
q = "How many grand-children did Jacques Cousteau have?"
target = "SELECT COUNT(DISTINCT ?y AS ?y) WHERE { <http://dbpedia.org/resource/Jacques_Cousteau> <http://dbpedia.org/ontology/child> ?x . ?x <http://dbpedia.org/ontology/child> ?y . }"
pretty_translate(target, q)

+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                   How many grand-children did Jacques Cousteau have?                                    |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | SELECT COUNT(DISTINCT ?y AS ?y) WHERE { <http://dbpedia.org/resource/Jacques_Cousteau> <http://dbpedia.org/ontology/chi |
|                |                                 ld> ?x . ?x <http://dbpedia.org/ontology/child> ?y . }                                  |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |                          select (COUNT(?sub) AS ?value ) { ?sub wdt:Q247153-child wd:Q83233 }                           |
+------------

In [48]:
# From QALD
q = "Give me all professional skateboarders from Sweden."
target = "SELECT DISTINCT ?uri WHERE { ?uri dbo:occupation dbr:Skateboarder { ?uri dbo:birthPlace dbr:Sweden } UNION { ?uri dbo:birthPlace ?place . ?place dbo:country dbr:Sweden } }"
pretty_translate(target, q)

<re.Match object; span=(36, 62), match=':professional_skateboarder'>
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                   Give me all professional skateboarders from Sweden.                                   |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | SELECT DISTINCT ?uri WHERE { ?uri dbo:occupation dbr:Skateboarder { ?uri dbo:birthPlace dbr:Sweden } UNION { ?uri dbo:b |
|                |                                  irthPlace ?place . ?place dbo:country dbr:Sweden } }                                   |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |                  select ?answer where { wd:Q34 wdt:professional_sk

In [49]:
# From QALD
q = "Which monarchs of the United Kingdom were married to a German?"
target = "SELECT DISTINCT ?uri WHERE { ?uri rdf:type yago:WikicatMonarchsOfTheUnitedKingdom ; dbo:spouse ?spouse . ?spouse dbo:birthPlace res:Germany }"
pretty_translate(target, q)

<re.Match object; span=(80, 111), match=':monarchs_of_the_united_kingdom'>
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                             Which monarchs of the United Kingdom were married to a German?                              |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | SELECT DISTINCT ?uri WHERE { ?uri rdf:type yago:WikicatMonarchsOfTheUnitedKingdom ; dbo:spouse ?spouse . ?spouse dbo:bi |
|                |                                                 rthPlace res:Germany }                                                  |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |          select distinct ?sbj where { ?sbj wdt:P26 wd:Q188 .

In [50]:
# From QALD
q = "Give me all Argentine films."
target = "SELECT DISTINCT ?uri WHERE { { ?uri rdf:type yago:ArgentineFilms } UNION { ?uri rdf:type dbo:Film { ?uri dbo:country res:Argentina } UNION { ?uri dbp:country \"Argentina\"@en } } }"
pretty_translate(target, q)

+----------------+-------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                              Give me all Argentine films.                                               |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
|     Target     | SELECT DISTINCT ?uri WHERE { { ?uri rdf:type yago:ArgentineFilms } UNION { ?uri rdf:type dbo:Film { ?uri dbo:country re |
|                |                               s:Argentina } UNION { ?uri dbp:country "Argentina"@en } } }                               |
+----------------+-------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |                          select ?answer where { wd:Q414 wdt:Q5449034 ?X . ?X wdt:P17 ?answer}                           |
+------------

In [51]:
# From QALD
q = "How did Michael Jackson die?"
target = "SELECT DISTINCT ?s WHERE { <http://dbpedia.org/resource/Michael_Jackson> <http://dbpedia.org/property/deathCause> ?s }"
pretty_translate(target, q)

+----------------+------------------------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                              How did Michael Jackson die?                                              |
+----------------+------------------------------------------------------------------------------------------------------------------------+
|     Target     | SELECT DISTINCT ?s WHERE { <http://dbpedia.org/resource/Michael_Jackson> <http://dbpedia.org/property/deathCause> ?s } |
+----------------+------------------------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |                            select ?answer where { wd:Q2831 wdt:P20 ?X . ?X wdt:P20 ?answer}                            |
+----------------+------------------------------------------------------------------------------------------------------------------------+
|   RESULT-Raw   |  

In [54]:
# From QALD
q = "Where did Michael Jackson died?"
target = "."
pretty_translate(target, q)

+----------------+--------------------------------------------------------------------------------------------------+
|    QUESTION    |                                 Where did Michael Jackson died?                                  |
+----------------+--------------------------------------------------------------------------------------------------+
|     Target     |                                                .                                                 |
+----------------+--------------------------------------------------------------------------------------------------+
| RESULT-DECODED |                 select ?answer where { wd:Q2831 wdt:P20 ?X . ?X wdt:P20 ?answer}                 |
+----------------+--------------------------------------------------------------------------------------------------+
|   RESULT-Raw   | select ?answer where { wd:michael_jackson wdt:place_of_death ?X . ?X wdt:place_of_death ?answer} |
+----------------+--------------------------------------

In [55]:
# From QALD
q = "Which classes does the Millepede belong to?"
target = "SELECT DISTINCT ?String WHERE { res:Millipede dbp:taxon ?String }"
pretty_translate(target, q)

<re.Match object; span=(37, 58), match=':classes_of_millepede'>
<re.Match object; span=(84, 103), match=':classes_of_science'>
+----------------+-----------------------------------------------------------------------------------------------------------+
|    QUESTION    |                                Which classes does the Millepede belong to?                                |
+----------------+-----------------------------------------------------------------------------------------------------------+
|     Target     |                     SELECT DISTINCT ?String WHERE { res:Millipede dbp:taxon ?String }                     |
+----------------+-----------------------------------------------------------------------------------------------------------+
| RESULT-DECODED |     select distinct ?sbj where { ?sbj wdt:classes_of_millepede . ?sbj wdt:P31 wd:classes_of_science }     |
+----------------+---------------------------------------------------------------------------------------------

In [None]:
# From QALD
q = "Which classes does the Millepede belong to?"
target = "SELECT DISTINCT ?String WHERE { res:Millipede dbp:taxon ?String }"
pretty_translate(target, q)