Skip to content

Commit

Permalink
Merge pull request #1505 from ashishpriyadarshiCIC/test-script-sqlite
Browse files Browse the repository at this point in the history
Test function sqlite
  • Loading branch information
henrykironde committed Sep 2, 2020
2 parents dd453ee + e5ae583 commit 1515ade
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 4 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ sphinxcontrib-napoleon
sphinx_rtd_theme
tqdm==4.30.0
pandas
setuptools
setuptools
13 changes: 12 additions & 1 deletion retriever/lib/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
excel_csv,
)
from retriever.lib.engine_tools import geojson2csv
from retriever.lib.engine_tools import sqlite2csv
from retriever.lib.warning import Warning


Expand Down Expand Up @@ -583,7 +584,17 @@ def excel_to_csv(self, src_path, path_to_csv, excel_info=None, encoding=ENCODING
excel_csv(src_path, path_to_csv, excel_info, encoding)

def process_geojson2csv(self, src_path, path_to_csv, encoding=ENCODING):
geojson2csv(src_path, path_to_csv, encoding)
if self.find_file(src_path):
geojson2csv(src_path, path_to_csv, encoding)

def process_sqlite2csv(self,
src_path,
path_to_csv,
table_name=None,
encoding=ENCODING):
"""Process sqlite database to csv files."""
if self.find_file(src_path):
sqlite2csv(src_path, path_to_csv, table_name, encoding)

def extract_gz(
self,
Expand Down
14 changes: 14 additions & 0 deletions retriever/lib/engine_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
import shutil
import subprocess
import warnings
import pandas as pd
from sqlite3 import Error
import sqlite3 as sql

from hashlib import md5
from io import StringIO as NewFile
Expand Down Expand Up @@ -137,6 +140,17 @@ def json2csv(input_file, output_file=None, header_values=None, encoding=ENCODING
return output_file


def sqlite2csv(input_file, output_file, table_name=None, encoding=ENCODING):
"""Convert sqlite database file to CSV."""
conn = sql.connect(input_file)
cursor = conn.cursor()
table = pd.read_sql_query("SELECT * from %s" % table_name, conn)
table.to_csv(output_file, index=False)
cursor.close()
conn.close()
return output_file


def xml2csv(input_file, outputfile=None, header_values=None, row_tag="row"):
"""Convert xml to csv.
Expand Down
2 changes: 1 addition & 1 deletion retriever/lib/load_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def read_json(json_file):

if isinstance(json_object, dict) and "resources" in json_object.keys():
# Note::formats described by frictionless data may need to change
tabular_exts = {"csv", "tab", "geojson"}
tabular_exts = {"csv", "tab", "geojson", "sqlite", "db"}
vector_exts = {"shp", "kmz"}
raster_exts = {"tif", "tiff", "bil", "hdr", "h5", "hdf5", "hr", "image"}
for resource_item in json_object["resources"]:
Expand Down
7 changes: 7 additions & 0 deletions retriever/lib/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,13 @@ def process_tables(self, table_obj, url):
self.engine.download_file(url, table_obj.geojson_data)
self.engine.process_geojson2csv(src_path, path_to_csv)

if hasattr(table_obj, "sqlite_data"):
src_path = self.engine.format_filename(table_obj.sqlite_data[1])
path_to_csv = self.engine.format_filename(table_obj.path)
self.engine.download_file(url, table_obj.sqlite_data[1])
self.engine.process_sqlite2csv(src_path, path_to_csv,
table_obj.sqlite_data[0])

if hasattr(table_obj, "path"):
self.engine.auto_create_table(table_obj, url=url, filename=table_obj.path)
else:
Expand Down
3 changes: 2 additions & 1 deletion test/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@
db_md5 = [
('flensburg_food_web', '89c8ae47fb419d0336b2c22219f23793'),
('bird_size', '98dcfdca19d729c90ee1c6db5221b775'),
('mammal_masses', '6fec0fc63007a4040d9bbc5cfcd9953e')
('mammal_masses', '6fec0fc63007a4040d9bbc5cfcd9953e'),
('portal-project-teaching', 'f81620d5f5550b81062e427542e96fa5')
]

spatial_db_md5 = [
Expand Down
17 changes: 17 additions & 0 deletions test/test_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
except ModuleNotFoundError:
pass

from retriever.lib.engine_tools import sqlite2csv
from retriever.lib.engine_tools import sort_file
from retriever.lib.engine_tools import sort_csv
from retriever.lib.engine_tools import create_file
Expand All @@ -38,6 +39,10 @@
("simple_geojson2csv", "lake_county.geojson", "http://data-lakecountyil.opendata.arcgis.com/datasets/cd63911cc52841f38b289aeeeff0f300_1.geojson", 'fid,zip,colorectal,lung_bronc,breast_can,prostate_c,urinary_sy,all_cancer,shape_length,shape_area,geometry')
]

sqlite2csv_dataset = [
("simple_sqlite2csv", "portal_project.sqlite", "https://ndownloader.figshare.com/files/11188550", "plots", ['plot_id,plot_type'])
]

# Main paths
HOMEDIR = os.path.expanduser('~')
file_location = os.path.dirname(os.path.realpath(__file__))
Expand Down Expand Up @@ -560,6 +565,18 @@ def test_geojson2csv(test_name, table_name, geojson_data_url, expected):
os.remove(table_name)
assert header_val == expected

@pytest.mark.parametrize("test_name, db_name, sqlite_data_url, table_name, expected", sqlite2csv_dataset)
def test_sqlite2csv(test_name, db_name, sqlite_data_url, table_name, expected):
r = requests.get(sqlite_data_url, allow_redirects=True)
open(db_name, 'wb').write(r.content)
output_sqlite = sqlite2csv(db_name, "output_file_sqlite.csv", table_name, encoding=test_engine.encoding)
header_val = None
with open(output_sqlite, 'r') as fh:
header_val = fh.readline().split()
os.remove(output_sqlite)
os.remove(db_name)
assert header_val == expected

def test_xml2csv():
"""Test xml2csv function.
Expand Down

0 comments on commit 1515ade

Please sign in to comment.