Merge ea00709 into fcab173

wireservice · Dec 20, 2022 · 3777225 · 3777225
2 parents fcab173 + ea00709
commit 3777225
Show file tree

Hide file tree

Showing 33 changed files with 187 additions and 340 deletions.
diff --git a/csvkit/cleanup.py b/csvkit/cleanup.py
@@ -21,7 +21,7 @@ def join_rows(rows, joiner=' '):
     return fixed_row
 
 
-class RowChecker(object):
+class RowChecker:
     """
     Iterate over rows of a CSV producing cleaned rows and storing error rows.
     """

diff --git a/csvkit/cli.py b/csvkit/cli.py
@@ -2,29 +2,19 @@
 
 import argparse
 import bz2
-import codecs
 import gzip
 import itertools
+import lzma
 import sys
 import warnings
 from os.path import splitext
 
 import agate
-import six
-
-if six.PY3:
-    import lzma
-elif six.PY2:
-    # Try import backports.lzma if available
-    try:
-        from backports import lzma
-    except ImportError:
-        lzma = None
 
 from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError
 
 
-class LazyFile(six.Iterator):
+class LazyFile:
     """
     A proxy for a File object that delays opening it until
     a read method is called.
@@ -65,7 +55,7 @@ def __next__(self):
         return next(self.f)
 
 
-class CSVKitUtility(object):
+class CSVKitUtility:
     description = ''
     epilog = ''
     override_flags = ''
@@ -241,32 +231,21 @@ def _open_input_file(self, path):
         """
         Open the input file specified on the command line.
         """
-        if six.PY2:
-            mode = 'Urb'
-            kwargs = {}
-        else:
-            mode = 'rt'  # default
-            kwargs = {'encoding': self.args.encoding}
-
         if not path or path == '-':
             f = sys.stdin
         else:
             extension = splitext(path)[1]
 
             if extension == '.gz':
-                f = LazyFile(gzip.open, path, mode, **kwargs)
+                func = gzip.open
             elif extension == '.bz2':
-                if six.PY2:
-                    f = LazyFile(bz2.BZ2File, path, mode, **kwargs)
-                else:
-                    f = LazyFile(bz2.open, path, mode, **kwargs)
+                func = bz2.open
             elif extension == ".xz":
-                if lzma is not None:
-                    f = LazyFile(lzma.open, path, mode, **kwargs)
-                else:
-                    raise RuntimeError("backports.lzma is needed for .xz support with Python 2")
+                func = lzma.open
             else:
-                f = LazyFile(open, path, mode, **kwargs)
+                func = open
+
+            f = LazyFile(func, path, mode='rt', encoding=self.args.encoding)
 
         return f
 
@@ -286,9 +265,6 @@ def _extract_csv_reader_kwargs(self):
             if value is not None:
                 kwargs[arg] = value
 
-        if six.PY2 and self.args.encoding:
-            kwargs['encoding'] = self.args.encoding
-
         if getattr(self.args, 'no_header_row', None):
             kwargs['header'] = not self.args.no_header_row
 
@@ -309,9 +285,6 @@ def _install_exception_handler(self):
         """
         Installs a replacement for sys.excepthook, which handles pretty-printing uncaught exceptions.
         """
-        if six.PY2:
-            sys.stderr = codecs.getwriter('utf-8')(sys.stderr)
-
         def handler(t, value, traceback):
             if self.args.verbose:
                 sys.__excepthook__(t, value, traceback)
@@ -323,7 +296,7 @@ def handler(t, value, traceback):
                                      'flag or with the PYTHONIOENCODING environment variable. Use the -v flag to see '
                                      'the complete error.\n' % self.args.encoding)
                 else:
-                    sys.stderr.write('%s: %s\n' % (t.__name__, six.text_type(value)))
+                    sys.stderr.write('%s: %s\n' % (t.__name__, str(value)))
 
         sys.excepthook = handler
 
@@ -360,8 +333,7 @@ def get_column_types(self):
     def get_column_offset(self):
         if self.args.zero_based:
             return 0
-        else:
-            return 1
+        return 1
 
     def skip_lines(self):
         if isinstance(self.args.skip_lines, int):
@@ -444,24 +416,24 @@ def match_column_identifier(column_names, c, column_offset=1):
     Note that integer values are *always* treated as positional identifiers. If you happen to have
     column names which are also integers, you must specify them using a positional index.
     """
-    if isinstance(c, six.string_types) and not c.isdigit() and c in column_names:
+    if isinstance(c, str) and not c.isdigit() and c in column_names:
         return column_names.index(c)
-    else:
-        try:
-            c = int(c) - column_offset
-        # Fail out if neither a column name nor an integer
-        except ValueError:
-            raise ColumnIdentifierError("Column '%s' is invalid. It is neither an integer nor a column name. "
-                                        "Column names are: %s" % (c, repr(column_names)[1:-1]))
-
-        # Fail out if index is 0-based
-        if c < 0:
-            raise ColumnIdentifierError("Column %i is invalid. Columns are 1-based." % (c + column_offset))
-
-        # Fail out if index is out of range
-        if c >= len(column_names):
-            raise ColumnIdentifierError("Column %i is invalid. The last column is '%s' at index %i." % (
-                c + column_offset, column_names[-1], len(column_names) - 1 + column_offset))
+
+    try:
+        c = int(c) - column_offset
+    # Fail out if neither a column name nor an integer
+    except ValueError:
+        raise ColumnIdentifierError("Column '%s' is invalid. It is neither an integer nor a column name. "
+                                    "Column names are: %s" % (c, repr(column_names)[1:-1]))
+
+    # Fail out if index is 0-based
+    if c < 0:
+        raise ColumnIdentifierError("Column %i is invalid. Columns are 1-based." % (c + column_offset))
+
+    # Fail out if index is out of range
+    if c >= len(column_names):
+        raise ColumnIdentifierError("Column %i is invalid. The last column is '%s' at index %i." % (
+            c + column_offset, column_names[-1], len(column_names) - 1 + column_offset))
 
     return c
 

diff --git a/csvkit/convert/__init__.py b/csvkit/convert/__init__.py
@@ -15,7 +15,7 @@ def guess_format(filename):
 
     if extension in ('csv', 'dbf', 'fixed', 'xls', 'xlsx'):
         return extension
-    elif extension in ['json', 'js']:
+    if extension in ('json', 'js'):
         return 'json'
 
     return None
diff --git a/csvkit/convert/fixed.py b/csvkit/convert/fixed.py
@@ -2,9 +2,9 @@
 
 from codecs import iterdecode
 from collections import namedtuple
+from io import StringIO
 
 import agate
-import six
 
 
 def fixed2csv(f, schema, output=None, skip_lines=0, **kwargs):
@@ -27,10 +27,10 @@ def fixed2csv(f, schema, output=None, skip_lines=0, **kwargs):
     :param skip_lines:
         The number of lines to skip from the top of the file.
     """
-    streaming = True if output else False
+    streaming = bool(output)
 
     if not streaming:
-        output = six.StringIO()
+        output = StringIO()
 
     try:
         encoding = kwargs['encoding']
@@ -59,7 +59,7 @@ def fixed2csv(f, schema, output=None, skip_lines=0, **kwargs):
     return ''
 
 
-class FixedWidthReader(six.Iterator):
+class FixedWidthReader:
     """
     Given a fixed-width file and a schema file, produce an analog to a csv
     reader that yields a row of strings for each line in the fixed-width file,
@@ -95,7 +95,7 @@ def __next__(self):
 FixedWidthField = namedtuple('FixedWidthField', ['name', 'start', 'length'])
 
 
-class FixedWidthRowParser(object):
+class FixedWidthRowParser:
     """
     Instantiated with a schema, able to return a sequence of trimmed strings
     representing fields given a fixed-length line. Flexible about where the
@@ -135,7 +135,7 @@ def headers(self):
         return [field.name for field in self.fields]
 
 
-class SchemaDecoder(object):
+class SchemaDecoder:
     """
     Extracts column, start, and length columns from schema rows. Once
     instantiated, each time the instance is called with a row, a

diff --git a/csvkit/convert/geojs.py b/csvkit/convert/geojs.py
@@ -1,14 +1,10 @@
 #!/usr/bin/env python
 
-try:
-    import json
-    from collections import OrderedDict
-except ImportError:
-    from ordereddict import OrderedDict
-    import simplejson as json
+import json
+from collections import OrderedDict
+from io import StringIO
 
 import agate
-import six
 
 
 def geojson2csv(f, key=None, **kwargs):
@@ -58,7 +54,7 @@ def geojson2csv(f, key=None, **kwargs):
     header.extend(property_fields)
     header.extend(('geojson', 'type', 'longitude', 'latitude'))
 
-    o = six.StringIO()
+    o = StringIO()
     writer = agate.csv.writer(o)
 
     writer.writerow(header)

diff --git a/csvkit/exceptions.py b/csvkit/exceptions.py
@@ -30,7 +30,7 @@ class CSVTestException(CustomException):
     """
 
     def __init__(self, line_number, row, msg):
-        super(CSVTestException, self).__init__(msg)
+        super().__init__(msg)
         self.line_number = line_number
         self.row = row
 
@@ -42,7 +42,7 @@ class LengthMismatchError(CSVTestException):
 
     def __init__(self, line_number, row, expected_length):
         msg = 'Expected %i columns, found %i columns' % (expected_length, len(row))
-        super(LengthMismatchError, self).__init__(line_number, row, msg)
+        super().__init__(line_number, row, msg)
 
     @property
     def length(self):
@@ -59,7 +59,7 @@ def __init__(self, index, value, normal_type):
         self.value = value
         self.normal_type = normal_type
         msg = 'Unable to convert "%s" to type %s (at index %i)' % (value, normal_type, index)
-        super(InvalidValueForTypeException, self).__init__(msg)
+        super().__init__(msg)
 
 
 class RequiredHeaderError(CustomException):

diff --git a/csvkit/grep.py b/csvkit/grep.py
@@ -1,11 +1,10 @@
 #!/usr/bin/env python
 
-import six
 
 from csvkit.exceptions import ColumnIdentifierError
 
 
-class FilteringCSVReader(six.Iterator):
+class FilteringCSVReader:
     r"""
     Given any row iterator, only return rows which pass the filter.
     If 'header' is False, then all rows must pass the filter; by default, the first row will be passed
@@ -34,7 +33,7 @@ class FilteringCSVReader(six.Iterator):
     column_names = None
 
     def __init__(self, reader, patterns, header=True, any_match=False, inverse=False):
-        super(FilteringCSVReader, self).__init__()
+        super().__init__()
 
         self.reader = reader
         self.header = header
@@ -78,8 +77,7 @@ def test_row(self, row):
 
         if self.any_match:
             return self.inverse  # False
-        else:
-            return not self.inverse  # True
+        return not self.inverse  # True
 
 
 def standardize_patterns(column_names, patterns):
@@ -122,7 +120,7 @@ def pattern_as_function(obj):
     return lambda x: obj in x
 
 
-class regex_callable(object):
+class regex_callable:
 
     def __init__(self, pattern):
         self.pattern = pattern

diff --git a/csvkit/utilities/csvgrep.py b/csvkit/utilities/csvgrep.py
@@ -5,7 +5,6 @@
 from argparse import FileType
 
 import agate
-import six
 
 from csvkit.cli import CSVKitUtility
 from csvkit.grep import FilteringCSVReader
@@ -16,24 +15,17 @@ class CSVGrep(CSVKitUtility):
     override_flags = ['L', 'blanks', 'date-format', 'datetime-format']
 
     def add_arguments(self):
-        # I feel that there ought to be a better way to do this across Python 2 and 3.
-        def option_parser(bytestring):
-            if six.PY2:
-                return bytestring.decode(sys.getfilesystemencoding())
-            else:
-                return bytestring
-
         self.argparser.add_argument(
             '-n', '--names', dest='names_only', action='store_true',
             help='Display column names and indices from the input CSV and exit.')
         self.argparser.add_argument(
             '-c', '--columns', dest='columns',
             help='A comma-separated list of column indices, names or ranges to be searched, e.g. "1,id,3-5".')
         self.argparser.add_argument(
-            '-m', '--match', dest="pattern", action='store', type=option_parser,
+            '-m', '--match', dest="pattern", action='store',
             help='A string to search for.')
         self.argparser.add_argument(
-            '-r', '--regex', dest='regex', action='store', type=option_parser,
+            '-r', '--regex', dest='regex', action='store',
             help='A regular expression to match.')
         self.argparser.add_argument(
             '-f', '--file', dest='matchfile', type=FileType('r'), action='store',