Skip to content

Commit

Permalink
Merge 23b8789 into ffbc152
Browse files Browse the repository at this point in the history
  • Loading branch information
slhck committed Sep 8, 2022
2 parents ffbc152 + 23b8789 commit 14cbe14
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 8 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Expand Up @@ -36,10 +36,11 @@ jobs:
- run: flake8 .
- run: isort . --check-only
- run: pip install .[test] coveralls
- run: pytest --cov csvkit
env:
- env:
LANG: en_US.UTF-8
PYTHONIOENCODING: utf-8
PYTHONUTF8: 1
run: pytest --cov csvkit
# CoverallsException: Not on TravisCI. You have to provide either repo_token in .coveralls.yml or set the COVERALLS_REPO_TOKEN env var.
- if: matrix.python-version != '2.7'
env:
Expand Down
1 change: 1 addition & 0 deletions AUTHORS.rst
Expand Up @@ -105,3 +105,4 @@ The following individuals have contributed code to csvkit:
* Bonifacio de Oliveira
* Ryan Grout
* badbunnyyy
* Werner Robitza
1 change: 1 addition & 0 deletions CHANGELOG.rst
Expand Up @@ -2,6 +2,7 @@ Unreleased
----------

* feat: :doc:`/scripts/csvsql` accepts multiple :code:`--query` command-line arguments.
* feat: :doc:`/scripts/csvstat` adds :code:`--no-grouping-separator` and :code:`--decimal-format` options.

1.0.7 - March 6, 2022
---------------------
Expand Down
17 changes: 12 additions & 5 deletions csvkit/utilities/csvstat.py
Expand Up @@ -114,6 +114,13 @@ def add_arguments(self):
self.argparser.add_argument(
'--count', dest='count_only', action='store_true',
help='Only output total row count.')
self.argparser.add_argument(
'--decimal-format', dest='decimal_format', type=str, default='%.3f',
help='%%-format specification for printing decimal numbers. '
'Defaults to locale-specific formatting with "%%.3f".')
self.argparser.add_argument(
'-G', '--no-grouping-separator', dest='no_grouping_separator', action='store_true',
help='Do not use grouping separators in decimal numbers.')
self.argparser.add_argument(
'-y', '--snifflimit', dest='sniff_limit', type=int, default=1024,
help='Limit CSV dialect sniffing to the specified number of bytes. '
Expand Down Expand Up @@ -215,7 +222,7 @@ def print_one(self, table, column_id, operation, label=True, **kwargs):
stat = table.aggregate(op(column_id))

if self.is_finite_decimal(stat):
stat = format_decimal(stat)
stat = format_decimal(stat, self.args.decimal_format, self.args.no_grouping_separator)
except Exception:
stat = None

Expand Down Expand Up @@ -249,7 +256,7 @@ def calculate_stats(self, table, column_id, **kwargs):
v = table.aggregate(op(column_id))

if self.is_finite_decimal(v):
v = format_decimal(v)
v = format_decimal(v, self.args.decimal_format, self.args.no_grouping_separator)

stats[op_name] = v
except Exception:
Expand Down Expand Up @@ -293,7 +300,7 @@ def print_stats(self, table, column_ids, stats):
v = row['value']

if self.is_finite_decimal(v):
v = format_decimal(v)
v = format_decimal(v, self.args.decimal_format, self.args.no_grouping_separator)
else:
v = six.text_type(row['value'])

Expand Down Expand Up @@ -345,8 +352,8 @@ def print_csv(self, table, column_ids, stats):
writer.writerow(output_row)


def format_decimal(d):
return locale.format_string('%.3f', d, grouping=True).rstrip('0').rstrip('.')
def format_decimal(d, f='%.3f', no_grouping_separator=False):
return locale.format_string(f, d, grouping=not no_grouping_separator).rstrip('0').rstrip('.')


def get_type(table, column_id, **kwargs):
Expand Down
8 changes: 7 additions & 1 deletion docs/scripts/csvstat.rst
Expand Up @@ -12,7 +12,8 @@ Prints descriptive statistics for all columns in a CSV file. Will intelligently
[-K SKIP_LINES] [-v] [-l] [--zero] [-V] [--csv] [-n]
[-c COLUMNS] [--type] [--nulls] [--unique] [--min] [--max]
[--sum] [--mean] [--median] [--stdev] [--len] [--freq]
[--freq-count FREQ_COUNT] [--count] [-y SNIFF_LIMIT]
[--freq-count FREQ_COUNT] [--count] [--decimal-format DECIMAL_FORMAT]
[-G] [-y SNIFF_LIMIT]
[FILE]

Print descriptive statistics for each column in a CSV file.
Expand Down Expand Up @@ -44,6 +45,11 @@ Prints descriptive statistics for all columns in a CSV file. Will intelligently
--freq-count FREQ_COUNT
The maximum number of frequent values to display.
--count Only output total row count.
--decimal-format DECIMAL_FORMAT
%-format specification for printing decimal numbers.
Defaults to locale-specific formatting with "%.3f".
-G, --no-grouping-separator
Do not use grouping separators in decimal numbers.
-y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT
Limit CSV dialect sniffing to the specified number of
bytes. Specify "0" to disable sniffing.
Expand Down
17 changes: 17 additions & 0 deletions tests/test_utilities/test_csvstat.py
Expand Up @@ -108,3 +108,20 @@ def test_csv_columns(self):
self.assertEqual(row[2], 'Text')
self.assertEqual(row[5], '')
self.assertEqual(row[11], '16')

def test_decimal_format(self):
output = self.get_output(['-c', 'TOTAL', '--mean', 'examples/realdata/FY09_EDU_Recipients_by_State.csv'])

self.assertEqual(output, '9,748.346\n')

output = self.get_output([
'-c', 'TOTAL', '--mean', '--no-grouping-separator', 'examples/realdata/FY09_EDU_Recipients_by_State.csv'
])

self.assertEqual(output, '9748.346\n')

output = self.get_output([
'-c', 'TOTAL', '--mean', '--decimal-format', '%.2f', 'examples/realdata/FY09_EDU_Recipients_by_State.csv'
])

self.assertEqual(output, '9,748.35\n')

0 comments on commit 14cbe14

Please sign in to comment.