Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions csvkit/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import datetime

import six
import normality
from unidecode import unidecode

from sqlalchemy import Column, MetaData, Table, create_engine
from sqlalchemy import BigInteger, Boolean, Date, DateTime, Float, Integer, String, Time
Expand All @@ -27,7 +29,12 @@
SQL_INTEGER_MAX = 2147483647
SQL_INTEGER_MIN = -2147483647

def make_column(column, no_constraints=False):

def normalize_name(name):
return normality.slugify(unidecode(name), sep='_')


def make_column(column, no_constraints=False, normalize_columns=False):
"""
Creates a sqlalchemy column from a csvkit Column.
"""
Expand Down Expand Up @@ -66,15 +73,19 @@ def make_column(column, no_constraints=False):

sql_column_kwargs['nullable'] = column.has_nulls()

return Column(column.name, sql_column_type(**sql_type_kwargs), **sql_column_kwargs)
column_name = column.name
if normalize_columns:
column_name = normalize_name(column.name)

return Column(column_name, sql_column_type(**sql_type_kwargs), **sql_column_kwargs)

def get_connection(connection_string):
engine = create_engine(connection_string)
metadata = MetaData(engine)

return engine, metadata

def make_table(csv_table, name='table_name', no_constraints=False, db_schema=None, metadata=None):
def make_table(csv_table, name='table_name', no_constraints=False, db_schema=None, normalize_columns=False, metadata=None):
"""
Creates a sqlalchemy table from a csvkit Table.
"""
Expand All @@ -84,7 +95,7 @@ def make_table(csv_table, name='table_name', no_constraints=False, db_schema=Non
sql_table = Table(csv_table.name, metadata, schema=db_schema)

for column in csv_table:
sql_table.append_column(make_column(column, no_constraints))
sql_table.append_column(make_column(column, no_constraints, normalize_columns))

return sql_table

Expand Down
5 changes: 4 additions & 1 deletion csvkit/utilities/csvsql.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def add_arguments(self):
help='In addition to creating the table, also insert the data into the table. Only valid when --db is specified.')
self.argparser.add_argument('--tables', dest='table_names',
help='Specify one or more names for the tables to be created. If omitted, the filename (minus extension) or "stdin" will be used.')
self.argparser.add_argument('-n', '--normalize-columns', dest='normalize_columns', action='store_true',
help='Normalize the headers before generating column names.')
self.argparser.add_argument('--no-constraints', dest='no_constraints', action='store_true',
help='Generate a schema without length limits or null checks. Useful when sampling big tables.')
self.argparser.add_argument('--no-create', dest='no_create', action='store_true',
Expand Down Expand Up @@ -115,6 +117,7 @@ def main(self):
table_name,
self.args.no_constraints,
self.args.db_schema,
self.args.normalize_columns,
metadata
)

Expand All @@ -125,7 +128,7 @@ def main(self):
# Insert data
if do_insert and csv_table.count_rows() > 0:
insert = sql_table.insert()
headers = csv_table.headers()
headers = [sql.normalize_name(h) for h in csv_table.headers()]
conn.execute(insert, [dict(zip(headers, row)) for row in csv_table.to_rows()])

# Output SQL statements
Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
'openpyxl==2.2.6',
'six>=1.6.1',
'python-dateutil==2.2',
'dbf>=0.96.005'
'dbf>=0.96.005',
'Unidecode>=0.04.19',
'normality>=0.2.4'
]

if sys.version_info < (2, 7):
Expand Down
14 changes: 14 additions & 0 deletions tests/test_sql.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python
# coding: utf-8

try:
import unittest2 as unittest
Expand Down Expand Up @@ -101,6 +102,19 @@ def test_make_create_table_statement_no_constraints(self):
\tempty_column VARCHAR
);""")

def test_make_create_table_statement_normalize_names(self):
csv_table = table.Table([
table.Column(0, u'äää H!HU', [u'Chicago Reader', u'Chicago Sun-Times', u'Chicago Tribune', u'Row with blanks'])],
name='test_table')
sql_table = sql.make_table(csv_table, 'csvsql', True, None, True)
statement = sql.make_create_table_statement(sql_table)

self.assertEqual(statement,
u"""CREATE TABLE test_table (
\taaa_h_hu VARCHAR
);""")


def test_make_create_table_statement_with_schema(self):
sql_table = sql.make_table(self.csv_table, 'csvsql', db_schema='test_schema')
statement = sql.make_create_table_statement(sql_table)
Expand Down