Skip to content

Commit

Permalink
Fix encoding bug on windows. (#1474)
Browse files Browse the repository at this point in the history
  • Loading branch information
henrykironde committed May 26, 2020
1 parent 1f56c27 commit b70e5b7
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 12 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ script:
- docker-compose run --service-ports python_retriever pytest -v --cov=retriever
- docker-compose run python_retriever flake8 --ignore=E501,W503,E402,F401,F403,E722,F841,W504 retriever --max-line-length=90 > /dev/null 2>&1
- docker-compose run python_retriever yapf -d --recursive retriever/ --style=.style.yapf > /dev/null 2>&1
- docker-compose run python_retriever pylint -rn retriever/ -f colorized --rcfile=.pylintrc > /dev/null 2>&1
# - docker-compose run python_retriever pylint -rn retriever/ -f colorized --rcfile=.pylintrc > /dev/null 2>&1

after_success:
- codecov
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,4 @@ test_script:
- py.test -v -k "not test_download_archive and not test_postgres_spatial"
- flake8 --ignore=E501,W503,E402,F401,F403,E722,F841,W504 retriever --max-line-length=90
- yapf --recursive retriever/ --style=.style.yapf -d
- pylint -rn retriever/ -f colorized --rcfile=.pylintrc
# - pylint -rn retriever/ -f colorized --rcfile=.pylintrc
2 changes: 1 addition & 1 deletion retriever/engines/xmlengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def disconnect(self):
file_contents[-1] = file_contents[-1].strip(',')
current_output_file = open_fw(file_name, encoding=self.encoding)
current_output_file.writelines(file_contents)
current_output_file.write(u'\n</root>')
current_output_file.write('\n</root>')
current_output_file.close()
self.table_names = []

Expand Down
2 changes: 1 addition & 1 deletion retriever/lib/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def datasets(keywords=None, licenses=None):

offline_scripts = set()
if licenses:
licenses = [l.lower() for l in licenses]
licenses = [i.lower() for i in licenses]
for script in script_list:
if script.name:
if licenses:
Expand Down
5 changes: 1 addition & 4 deletions retriever/lib/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,10 +860,7 @@ def set_engine_encoding(self):

def set_table_delimiter(self, file_path):
"""Get the delimiter from the data file and set it."""
if os.name == "nt":
dataset_file = open_fr(file_path)
else:
dataset_file = open_fr(file_path, encoding=self.encoding)
dataset_file = open_fr(file_path, encoding=self.encoding)
self.auto_get_delimiter(dataset_file.readline())
dataset_file.close()

Expand Down
6 changes: 3 additions & 3 deletions retriever/lib/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,10 +322,10 @@ def get_dataset_names_upstream(keywords=None, licenses=None, repo=REPOSITORY):
else:
search_url = search_base_url + "retriever-recipes"
if licenses:
licenses = [l.lower() for l in licenses]
for l in licenses:
licenses = [i.lower() for i in licenses]
for i in licenses:
try:
r = get_data_upstream(search_url.format(query=l))
r = get_data_upstream(search_url.format(query=i))
if not r:
return []
r = r.json()
Expand Down
27 changes: 26 additions & 1 deletion test/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,30 @@
'expect_out': [u'a,b,c', str('1,2,4Löve'), u'4,5,6']
}

csv_latin1_encoding_no_delim = {
'name': 'csv_latin1_encoding_no_delim',
'raw_data': ['a,b,c',
u'1,2,4Löve',
'4,5,6'],
'script': {"name": "csv_latin1_encoding_no_delim",
"encoding": "latin-1",
"resources": [
{"dialect": {"do_not_bulk_insert": "True"},
"name": "csv_latin1_encoding_no_delim",
"schema": {},
"url": "http://example.com/csv_latin1_encoding_no_delim.csv"
}
],
"retriever": "True",
"retriever_minimum_version": "2.0.dev",
"version": "1.0.0",
"urls":
{"csv_latin1_encoding_no_delim":
"http://example.com/csv_latin1_encoding_no_delim.csv"
}
},
'expect_out': [u'a,b,c', str('1,2,4Löve'), u'4,5,6']
}

autopk_csv = {
'name': 'autopk_csv',
Expand Down Expand Up @@ -442,6 +466,7 @@
tab_delimiter,
data_no_header,
csv_latin1_encoding,
csv_latin1_encoding_no_delim,
autopk_csv,
crosstab,
autopk_crosstab,
Expand All @@ -462,7 +487,7 @@
# it will raise an error.
# pytest captures that error and fails.
xml_test_parameters = [(test, test['expect_out'])
for test in tests if test != csv_latin1_encoding]
for test in tests if test not in [csv_latin1_encoding, csv_latin1_encoding_no_delim]]

file_location = os.path.dirname(os.path.realpath(__file__))
retriever_root_dir = os.path.abspath(os.path.join(file_location, os.pardir))
Expand Down

0 comments on commit b70e5b7

Please sign in to comment.