Skip to content

Commit

Permalink
Run "autopep8 -r --in-place --ignore E501 ."
Browse files Browse the repository at this point in the history
  • Loading branch information
James McKinney committed Jan 25, 2016
1 parent 39f4252 commit 7f1484f
Show file tree
Hide file tree
Showing 62 changed files with 453 additions and 326 deletions.
32 changes: 18 additions & 14 deletions csvkit/cleanup.py
Expand Up @@ -2,6 +2,7 @@

from csvkit.exceptions import CSVTestException, LengthMismatchError


def join_rows(rows, joiner=' '):
"""
Given a series of rows, return them as a single row where the inner edge cells are merged. By default joins with a single space character, but you can specify new-line, empty string, or anything else with the 'joiner' kwarg.
Expand All @@ -12,12 +13,13 @@ def join_rows(rows, joiner=' '):
for row in rows[1:]:
if len(row) == 0:
row = ['']

fixed_row[-1] += "%s%s" % (joiner, row[0])
fixed_row.extend(row[1:])

return fixed_row



def fix_length_errors(errs, target_line_length, joiner=' '):
"""
If possible, transform the rows backed up in the list of errors into rows of the correct length.
Expand All @@ -28,20 +30,21 @@ def fix_length_errors(errs, target_line_length, joiner=' '):

fixed_rows = []
backlog = []

for err in errs:
if type(err) is not LengthMismatchError:
return [] # give up if any are not length errors
return [] # give up if any are not length errors

backlog.append(err)
fixed_row = join_rows([err.row for err in backlog])

if len(fixed_row) == target_line_length:
fixed_rows.append(fixed_row)
backlog = [] # reset
backlog = [] # reset

return fixed_rows


def extract_joinable_row_errors(errs):
joinable = []

Expand All @@ -54,14 +57,16 @@ def extract_joinable_row_errors(errs):

joinable.append(err)

joinable.reverse()
joinable.reverse()

return joinable


class RowChecker(object):
"""
Iterate over rows of a CSV producing cleaned rows and storing error rows.
"""

def __init__(self, reader):
self.reader = reader
self.column_names = next(reader)
Expand All @@ -75,7 +80,7 @@ def checked_rows(self):
A generator which yields rows which are ready to write to output.
"""
line_number = self.reader.line_num

for row in self.reader:
try:
if len(row) != len(self.column_names):
Expand All @@ -86,7 +91,7 @@ def checked_rows(self):
self.errors.append(e)

joinable_row_errors = extract_joinable_row_errors(self.errors)

while joinable_row_errors:
fixed_row = join_rows([err.row for err in joinable_row_errors], joiner=' ')

Expand All @@ -98,16 +103,15 @@ def checked_rows(self):
self.joins += 1

yield fixed_row

for fixed in joinable_row_errors:
self.errors.remove(fixed)

break

joinable_row_errors = joinable_row_errors[1:] # keep trying in case we're too long because of a straggler
joinable_row_errors = joinable_row_errors[1:] # keep trying in case we're too long because of a straggler

except CSVTestException as e:
self.errors.append(e)

line_number = self.reader.line_num

55 changes: 30 additions & 25 deletions csvkit/cli.py
Expand Up @@ -12,12 +12,14 @@

from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError


def lazy_opener(fn):
def wrapped(self, *args, **kwargs):
self._lazy_open()
fn(*args, **kwargs)
return wrapped


class LazyFile(six.Iterator):
"""
A proxy for a File object that delays opening it until
Expand All @@ -26,6 +28,7 @@ class LazyFile(six.Iterator):
Currently this implements only the minimum methods to be useful,
but it could easily be expanded.
"""

def __init__(self, init, *args, **kwargs):
self.init = init
self.f = None
Expand Down Expand Up @@ -56,6 +59,7 @@ def __next__(self):

return next(self.f)


class CSVKitUtility(object):
description = ''
epilog = ''
Expand Down Expand Up @@ -94,7 +98,7 @@ def __init__(self, args=None, output_file=None):
import signal
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
except (ImportError, AttributeError):
#Do nothing on platforms that don't have signals or don't have SIGPIPE
# Do nothing on platforms that don't have signals or don't have SIGPIPE
pass

def add_arguments(self):
Expand Down Expand Up @@ -125,50 +129,50 @@ def _init_common_parser(self):
# Input
if 'f' not in self.override_flags:
self.argparser.add_argument(metavar="FILE", nargs='?', dest='input_path',
help='The CSV file to operate on. If omitted, will accept input on STDIN.')
help='The CSV file to operate on. If omitted, will accept input on STDIN.')
if 'd' not in self.override_flags:
self.argparser.add_argument('-d', '--delimiter', dest='delimiter',
help='Delimiting character of the input CSV file.')
help='Delimiting character of the input CSV file.')
if 't' not in self.override_flags:
self.argparser.add_argument('-t', '--tabs', dest='tabs', action='store_true',
help='Specifies that the input CSV file is delimited with tabs. Overrides "-d".')
help='Specifies that the input CSV file is delimited with tabs. Overrides "-d".')
if 'q' not in self.override_flags:
self.argparser.add_argument('-q', '--quotechar', dest='quotechar',
help='Character used to quote strings in the input CSV file.')
help='Character used to quote strings in the input CSV file.')
if 'u' not in self.override_flags:
self.argparser.add_argument('-u', '--quoting', dest='quoting', type=int, choices=[0,1,2,3],
help='Quoting style used in the input CSV file. 0 = Quote Minimal, 1 = Quote All, 2 = Quote Non-numeric, 3 = Quote None.')
self.argparser.add_argument('-u', '--quoting', dest='quoting', type=int, choices=[0, 1, 2, 3],
help='Quoting style used in the input CSV file. 0 = Quote Minimal, 1 = Quote All, 2 = Quote Non-numeric, 3 = Quote None.')
if 'b' not in self.override_flags:
self.argparser.add_argument('-b', '--doublequote', dest='doublequote', action='store_true',
help='Whether or not double quotes are doubled in the input CSV file.')
help='Whether or not double quotes are doubled in the input CSV file.')
if 'p' not in self.override_flags:
self.argparser.add_argument('-p', '--escapechar', dest='escapechar',
help='Character used to escape the delimiter if --quoting 3 ("Quote None") is specified and to escape the QUOTECHAR if --doublequote is not specified.')
help='Character used to escape the delimiter if --quoting 3 ("Quote None") is specified and to escape the QUOTECHAR if --doublequote is not specified.')
if 'z' not in self.override_flags:
self.argparser.add_argument('-z', '--maxfieldsize', dest='maxfieldsize', type=int,
help='Maximum length of a single field in the input CSV file.')
help='Maximum length of a single field in the input CSV file.')
if 'e' not in self.override_flags:
self.argparser.add_argument('-e', '--encoding', dest='encoding', default='utf-8',
help='Specify the encoding the input CSV file.')
help='Specify the encoding the input CSV file.')
if 'S' not in self.override_flags:
self.argparser.add_argument('-S', '--skipinitialspace', dest='skipinitialspace', default=False, action='store_true',
help='Ignore whitespace immediately following the delimiter.')
help='Ignore whitespace immediately following the delimiter.')
if 'H' not in self.override_flags:
self.argparser.add_argument('-H', '--no-header-row', dest='no_header_row', action='store_true',
help='Specifies that the input CSV file has no header row. Will create default headers (A,B,C,...).')
help='Specifies that the input CSV file has no header row. Will create default headers (A,B,C,...).')
if 'v' not in self.override_flags:
self.argparser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
help='Print detailed tracebacks when errors occur.')
help='Print detailed tracebacks when errors occur.')

# Output
if 'l' not in self.override_flags:
self.argparser.add_argument('-l', '--linenumbers', dest='line_numbers', action='store_true',
help='Insert a column of line numbers at the front of the output. Useful when piping to grep or as a simple primary key.')
help='Insert a column of line numbers at the front of the output. Useful when piping to grep or as a simple primary key.')

# Input/Output
if 'zero' not in self.override_flags:
self.argparser.add_argument('--zero', dest='zero_based', action='store_true',
help='When interpreting or displaying column numbers, use zero-based numbering instead of the default 1-based numbering.')
help='When interpreting or displaying column numbers, use zero-based numbering instead of the default 1-based numbering.')

def _open_input_file(self, path):
"""
Expand All @@ -179,7 +183,7 @@ def _open_input_file(self, path):
kwargs = {}
else:
mode = 'rt'
kwargs = { 'encoding': self.args.encoding }
kwargs = {'encoding': self.args.encoding}

if not path or path == '-':
f = sys.stdin
Expand Down Expand Up @@ -274,9 +278,9 @@ def print_column_names(self):
output = self.output_file

try:
zero_based=self.args.zero_based
zero_based = self.args.zero_based
except:
zero_based=False
zero_based = False

rows = agate.reader(f, **self.reader_kwargs)
column_names = next(rows)
Expand Down Expand Up @@ -314,6 +318,7 @@ def match_column_identifier(column_names, c, zero_based=False):

return c


def parse_column_identifiers(ids, column_names, zero_based=False, excluded_columns=None):
"""
Parse a comma-separated list of column indices AND/OR names into a list of integer indices.
Expand All @@ -338,9 +343,9 @@ def parse_column_identifiers(ids, column_names, zero_based=False, excluded_colum
columns.append(match_column_identifier(column_names, c, zero_based))
except ColumnIdentifierError:
if ':' in c:
a,b = c.split(':',1)
a, b = c.split(':', 1)
elif '-' in c:
a,b = c.split('-',1)
a, b = c.split('-', 1)
else:
raise

Expand All @@ -357,7 +362,7 @@ def parse_column_identifiers(ids, column_names, zero_based=False, excluded_colum
except ValueError:
raise ColumnIdentifierError("Invalid range %s. Ranges must be two integers separated by a - or : character.")

for x in range(a,b):
for x in range(a, b):
columns.append(match_column_identifier(column_names, x, zero_based))

excludes = []
Expand All @@ -370,9 +375,9 @@ def parse_column_identifiers(ids, column_names, zero_based=False, excluded_colum
excludes.append(match_column_identifier(column_names, c, zero_based))
except ColumnIdentifierError:
if ':' in c:
a,b = c.split(':',1)
a, b = c.split(':', 1)
elif '-' in c:
a,b = c.split('-',1)
a, b = c.split('-', 1)
else:
raise

Expand All @@ -389,7 +394,7 @@ def parse_column_identifiers(ids, column_names, zero_based=False, excluded_colum
except ValueError:
raise ColumnIdentifierError("Invalid range %s. Ranges must be two integers separated by a - or : character.")

for x in range(a,b):
for x in range(a, b):
excludes.append(match_column_identifier(column_names, x, zero_based))

return [c for c in columns if c not in excludes]
2 changes: 2 additions & 0 deletions csvkit/convert/__init__.py
Expand Up @@ -18,6 +18,7 @@

SUPPORTED_FORMATS.append('dbf')


def convert(f, format, schema=None, key=None, **kwargs):
"""
Convert a file of a specified format to CSV.
Expand Down Expand Up @@ -52,6 +53,7 @@ def convert(f, format, schema=None, key=None, **kwargs):
else:
raise ValueError('format "%s" is not supported' % format)


def guess_format(filename):
"""
Try to guess a file's format based on its extension (or lack thereof).
Expand Down
1 change: 1 addition & 0 deletions csvkit/convert/csvitself.py
Expand Up @@ -4,6 +4,7 @@

import agate


def csv2csv(f, **kwargs):
"""
"Convert" a CSV into a new CSV by normalizing types and correcting for other anomalies.
Expand Down
1 change: 1 addition & 0 deletions csvkit/convert/dbase.py
Expand Up @@ -8,6 +8,7 @@
import dbf
import six


def dbf2csv(f, **kwargs):
"""
Convert a dBASE .dbf file to csv.
Expand Down
15 changes: 10 additions & 5 deletions csvkit/convert/fixed.py
Expand Up @@ -6,6 +6,7 @@
import agate
import six


def fixed2csv(f, schema, output=None, **kwargs):
"""
Convert a fixed-width file to csv using a CSV-formatted schema description.
Expand Down Expand Up @@ -45,6 +46,7 @@ def fixed2csv(f, schema, output=None, **kwargs):
# Return empty string when streaming
return ''


class FixedWidthReader(six.Iterator):
"""
Given a fixed-width file and a schema file, produce an analog to a csv
Expand All @@ -58,6 +60,7 @@ class FixedWidthReader(six.Iterator):
in the 'start' column are assumed to be "zero-based" unless the first value
is "1" in which case all values are assumed to be "one-based."
"""

def __init__(self, f, schema, encoding=None):
if encoding is not None:
f = iterdecode(f, encoding)
Expand All @@ -78,24 +81,26 @@ def __next__(self):

FixedWidthField = namedtuple('FixedWidthField', ['name', 'start', 'length'])


class FixedWidthRowParser(object):
"""
Instantiated with a schema, able to return a sequence of trimmed strings
representing fields given a fixed-length line. Flexible about where the
columns are, as long as they are headed with the literal names 'column',
'start', and 'length'.
"""

def __init__(self, schema):
self.fields = [] # A list of FixedWidthFields
self.fields = [] # A list of FixedWidthFields

schema_reader = agate.reader(schema)
schema_decoder = SchemaDecoder(next(schema_reader))

for i,row in enumerate(schema_reader):
for i, row in enumerate(schema_reader):
try:
self.fields.append(schema_decoder(row))
except Exception as e:
raise ValueError("Error reading schema at line %i: %s" % (i + 2,e))
raise ValueError("Error reading schema at line %i: %s" % (i + 2, e))

def parse(self, line):
values = []
Expand All @@ -105,18 +110,18 @@ def parse(self, line):

return values


def parse_dict(self, line):
"""
Convenience method returns a dict. Equivalent to
``dict(zip(self.headers,self.parse(line)))``.
"""
return dict(zip(self.headers,self.parse(line)))
return dict(zip(self.headers, self.parse(line)))

@property
def headers(self):
return [field.name for field in self.fields]


class SchemaDecoder(object):
"""
Extracts column, start, and length columns from schema rows. Once
Expand Down
1 change: 1 addition & 0 deletions csvkit/convert/geojs.py
Expand Up @@ -10,6 +10,7 @@
import agate
import six


def geojson2csv(f, key=None, **kwargs):
"""
Convert a GeoJSON document into CSV format.
Expand Down

0 comments on commit 7f1484f

Please sign in to comment.