Skip to content

Commit

Permalink
Merge pull request #477 from henrykironde/enginejson
Browse files Browse the repository at this point in the history
add json engine fixes #152
  • Loading branch information
ethanwhite committed Apr 25, 2016
2 parents 97181a7 + bc85be1 commit aa877dc
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 71 deletions.
3 changes: 2 additions & 1 deletion engines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"sqlite",
"msaccess",
"csv",
"download_only"
"download_only",
"jsonengine",
]

engine_module_list = [
Expand Down
107 changes: 38 additions & 69 deletions engines/jsonengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from retriever.lib.models import Engine
from retriever import DATA_DIR

from collections import OrderedDict

class DummyConnection:
def cursor(self):
Expand Down Expand Up @@ -43,6 +43,7 @@ class engine(Engine):
"Format of table name",
os.path.join(DATA_DIR, "{db}_{table}.json")),
]
table_names = []

def create_db(self):
"""Override create_db since there is no database just a JSON file"""
Expand All @@ -51,80 +52,48 @@ def create_db(self):
def create_table(self):
"""Create the table by creating an empty json file"""
self.output_file = open(self.table_name(), "w")
self.output_file.write('[')
self.output_file.write("[")
self.table_names.append((self.output_file, self.table_name()))

def disconnect(self):
"""Close out the JSON with a ] and close the file
Do to an extra comma after the last entry it is necessary to close the
current file, read it back in, and remove the extra comma, before adding
the closing bracket, and re-writing the file to disk. This will be
inefficient for large files and we may want to replace it with something
better.
"""Close out the JSON with a ('\n]}') and close the file
Close all the file objects that have been created
Re-write the files stripping off the last comma and then close with a ('\n]}')
"""
try:
self.output_file.close()
current_output_file = open(self.table_name(), "r")
file_contents = current_output_file.readlines()
current_output_file.close()
if (file_contents[-1] != ']'):
for output_file_i, file_name in self.table_names:

try:
output_file_i.close()
current_input_file = open(file_name, "r")
file_contents = current_input_file.readlines()
current_input_file.close()
file_contents[-1] = file_contents[-1].strip(',')
file_contents.append('\n]')
self.output_file = open(self.table_name(), "w")
self.output_file.writelines(file_contents)
self.output_file.close()
except:
# when disconnect is called by app.connect_wizard.ConfirmPage to
# confirm the connection, output_file doesn't exist yet, this is
# fine so just pass
pass
current_output_file = open(file_name, "w")
current_output_file.writelines(file_contents)
current_output_file.write('\n]')
current_output_file.close()
except:
# when disconnect is called by app.connect_wizard.ConfirmPage to
# confirm the connection, output_file doesn't exist yet, this is
# fine so just pass
pass

def execute(self, statement, commit=True):
"""Write a line to the output file"""
self.output_file.write('\n' + statement + ',')

def format_insert_value(self, value, datatype):
"""Formats a value for an insert statement
Overrides default behavior by:
1. Storing decimal numbers as floats rather than strings
2. Not escaping quotes (handled by the json module)
3. Replacing "null" with None which will convert to the 'null' keyword
in json
"""
datatype = datatype.split('-')[-1]
strvalue = str(value).strip()

# Remove any quotes already surrounding the string
quotes = ["'", '"']
if len(strvalue) > 1 and strvalue[0] == strvalue[-1] and strvalue[0] in quotes:
strvalue = strvalue[1:-1]
nulls = ("null", "none")

if strvalue.lower() in nulls:
return None
elif datatype in ("int", "bigint", "bool"):
if strvalue:
intvalue = strvalue.split('.')[0]
if intvalue:
return int(intvalue)
else:
return None
else:
return None
elif datatype in ("double", "decimal"):
if strvalue:
return float(strvalue)
else:
return None
elif datatype == "char":
if strvalue.lower() in nulls:
return None
else:
return strvalue
else:
return None
"""Formats a value for an insert statement"""
v = Engine.format_insert_value(self, value, datatype)
if v == 'null':
return ""
try:
if len(v) > 1 and v[0] == v[-1] == "'":
v = '"%s"' % v[1:-1]
except:
pass
return v

def insert_statement(self, values):
if not hasattr(self, 'auto_column_number'):
Expand All @@ -137,11 +106,11 @@ def insert_statement(self, values):
[self.auto_column_number] + values[i + offset:]
self.auto_column_number += 1
offset += 1
# FIXME: Should nulls be inserted here? I'm guessing the should be
# skipped. Find out.
datadict = {column[0]: value for column,
value in zip(self.table.columns, values)}
return json.dumps(datadict)

keys = [columnname[0] for columnname in self.table.columns]
tuples = (zip(keys, values))
write_data = OrderedDict(tuples)
return json.dumps(write_data)

def table_exists(self, dbname, tablename):
"""Check to see if the data file currently exists"""
Expand Down
2 changes: 1 addition & 1 deletion test/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def get_script_module(script_name):
file, pathname, desc = imp.find_module(script_name, [os.path.join(HOME_DIR, "scripts")])
return imp.load_module(script_name, file, pathname, desc)

mysql_engine, postgres_engine, sqlite_engine, msaccess_engine, csv_engine, download_engine = ENGINE_LIST()
mysql_engine, postgres_engine, sqlite_engine, msaccess_engine, csv_engine, download_engine, json_engine = ENGINE_LIST()
csv_engine.opts = {'engine': 'csv', 'table_name': './{db}_{table}.txt'}

def test_csv_from_csv():
Expand Down

0 comments on commit aa877dc

Please sign in to comment.