In [2]:
import argparse
import glob
import os
import json
import time
import logging
import random
import re
from itertools import chain
from string import punctuation

import sys

ROOT = os.path.join(os.path.dirname(os.getcwd()), 'backend')
if ROOT not in sys.path:
    sys.path.append(ROOT)

import pandas as pd
import numpy as np

def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

seed_number = 42
set_seed(seed_number)

In [3]:
from app.dataService import globalVariable as GV

In [7]:
# table meta data
with open( os.path.join(GV.SPIDER_FOLDER, "tables.json"), "r") as f:
    table_data = json.load(f)
t_df = pd.DataFrame(table_data)
t_df

Unnamed: 0,column_names,column_names_original,column_types,db_id,foreign_keys,primary_keys,table_names,table_names_original
0,"[[-1, *], [0, perpetrator id], [0, people id],...","[[-1, *], [0, Perpetrator_ID], [0, People_ID],...","[text, number, number, text, number, text, tex...",perpetrator,"[[2, 9]]","[1, 9]","[perpetrator, people]","[perpetrator, people]"
1,"[[-1, *], [0, building], [0, room number], [0,...","[[-1, *], [0, building], [0, room_number], [0,...","[text, text, text, number, text, text, number,...",college_2,"[[9, 4], [13, 4], [19, 1], [20, 2], [15, 7], [...","[1, 4, 7, 11, 15, 22, 27, 31, 37, 39, 45]","[classroom, department, course, instructor, se...","[classroom, department, course, instructor, se..."
2,"[[-1, *], [0, id], [0, city], [0, country], [0...","[[-1, *], [0, id], [0, City], [0, Country], [0...","[text, number, text, text, text, text, text, n...",flight_company,"[[20, 7], [19, 1]]","[1, 7, 13]","[airport, operate company, flight]","[airport, operate_company, flight]"
3,"[[-1, *], [0, institution id], [0, name], [0, ...","[[-1, *], [0, instID], [0, name], [0, country]...","[text, number, text, text, number, text, text,...",icfp_1,"[[11, 7], [10, 1], [9, 4]]","[1, 4, 7, 9]","[institution, authors, papers, authorship count]","[Inst, Authors, Papers, Authorship]"
4,"[[-1, *], [0, body builder id], [0, people id]...","[[-1, *], [0, Body_Builder_ID], [0, People_ID]...","[text, number, number, number, number, number,...",body_builder,"[[2, 6]]","[1, 6]","[body builder, people]","[body_builder, people]"
...,...,...,...,...,...,...,...,...
161,"[[-1, *], [0, employee ssn], [0, project numbe...","[[-1, *], [0, Essn], [0, Pno], [0, Hours], [1,...","[text, number, number, number, text, text, tex...",company_1,[],"[1, 7, 15, 19, 22, 27]","[works on, employee, department, project, depe...","[works_on, employee, department, project, depe..."
162,"[[-1, *], [0, workshop id], [0, date], [0, ven...","[[-1, *], [0, Workshop_ID], [0, Date], [0, Ven...","[text, number, text, text, text, number, numbe...",workshop_paper,"[[10, 1], [9, 5]]","[1, 5, 9]","[workshop, submission, acceptance]","[workshop, submission, Acceptance]"
163,"[[-1, *], [0, item id], [0, title], [1, a id],...","[[-1, *], [0, i_id], [0, title], [1, a_id], [1...","[text, number, text, number, number, number, n...",epinions_1,"[[5, 1], [4, 8], [11, 8], [10, 8]]","[1, 3, 8]","[item, review, useracct, trust]","[item, review, useracct, trust]"
164,"[[-1, *], [0, party id], [0, party theme], [0,...","[[-1, *], [0, Party_ID], [0, Party_Theme], [0,...","[text, number, text, text, text, text, number,...",party_host,"[[11, 1], [12, 7]]","[1, 7, 11]","[party, host, party host]","[party, host, party_host]"


In [8]:
t_df.iloc[0]

column_names             [[-1, *], [0, perpetrator id], [0, people id],...
column_names_original    [[-1, *], [0, Perpetrator_ID], [0, People_ID],...
column_types             [text, number, number, text, number, text, tex...
db_id                                                          perpetrator
foreign_keys                                                      [[2, 9]]
primary_keys                                                        [1, 9]
table_names                                          [perpetrator, people]
table_names_original                                 [perpetrator, people]
Name: 0, dtype: object

In [9]:
# table query data (just for training)
with open( os.path.join(GV.SPIDER_FOLDER, "train_spider.json"), "r") as f:
    query_data = json.load(f)
q_df = pd.DataFrame(query_data)
q_df

Unnamed: 0,db_id,query,query_toks,query_toks_no_value,question,question_toks,sql
0,department_management,SELECT count(*) FROM head WHERE age > 56,"[SELECT, count, (, *, ), FROM, head, WHERE, ag...","[select, count, (, *, ), from, head, where, ag...",How many heads of the departments are older th...,"[How, many, heads, of, the, departments, are, ...","{'from': {'table_units': [['table_unit', 1]], ..."
1,department_management,"SELECT name , born_state , age FROM head ORD...","[SELECT, name, ,, born_state, ,, age, FROM, he...","[select, name, ,, born_state, ,, age, from, he...","List the name, born state and age of the heads...","[List, the, name, ,, born, state, and, age, of...","{'from': {'table_units': [['table_unit', 1]], ..."
2,department_management,"SELECT creation , name , budget_in_billions ...","[SELECT, creation, ,, name, ,, budget_in_billi...","[select, creation, ,, name, ,, budget_in_billi...","List the creation year, name and budget of eac...","[List, the, creation, year, ,, name, and, budg...","{'from': {'table_units': [['table_unit', 0]], ..."
3,department_management,"SELECT max(budget_in_billions) , min(budget_i...","[SELECT, max, (, budget_in_billions, ), ,, min...","[select, max, (, budget_in_billions, ), ,, min...",What are the maximum and minimum budget of the...,"[What, are, the, maximum, and, minimum, budget...","{'from': {'table_units': [['table_unit', 0]], ..."
4,department_management,SELECT avg(num_employees) FROM department WHER...,"[SELECT, avg, (, num_employees, ), FROM, depar...","[select, avg, (, num_employees, ), from, depar...",What is the average number of employees of the...,"[What, is, the, average, number, of, employees...","{'from': {'table_units': [['table_unit', 0]], ..."
...,...,...,...,...,...,...,...
6995,culture_company,SELECT T1.company_name FROM culture_company AS...,"[SELECT, T1.company_name, FROM, culture_compan...","[select, t1, ., company_name, from, culture_co...",What are all the company names that have a boo...,"[What, are, all, the, company, names, that, ha...","{'from': {'table_units': [['table_unit', 2], [..."
6996,culture_company,"SELECT T1.title , T3.book_title FROM movie AS...","[SELECT, T1.title, ,, T3.book_title, FROM, mov...","[select, t1, ., title, ,, t3, ., book_title, f...",Show the movie titles and book titles for all ...,"[Show, the, movie, titles, and, book, titles, ...","{'from': {'table_units': [['table_unit', 1], [..."
6997,culture_company,"SELECT T1.title , T3.book_title FROM movie AS...","[SELECT, T1.title, ,, T3.book_title, FROM, mov...","[select, t1, ., title, ,, t3, ., book_title, f...",What are the titles of movies and books corres...,"[What, are, the, titles, of, movies, and, book...","{'from': {'table_units': [['table_unit', 1], [..."
6998,culture_company,SELECT T2.company_name FROM movie AS T1 JOIN c...,"[SELECT, T2.company_name, FROM, movie, AS, T1,...","[select, t2, ., company_name, from, movie, as,...",Show all company names with a movie directed i...,"[Show, all, company, names, with, a, movie, di...","{'from': {'table_units': [['table_unit', 1], [..."
