From f2c16080715c20aff19cba9f9c64c389ff875a20 Mon Sep 17 00:00:00 2001 From: Jeff Larson Date: Mon, 9 Jan 2012 14:43:58 -0500 Subject: [PATCH 1/2] implement a loosey option for cases in which the schema doesn't need to be strict --- csvkit/sql.py | 19 ++++++++++--------- csvkit/utilities/csvsql.py | 11 ++++++++--- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/csvkit/sql.py b/csvkit/sql.py index 4203d54f4..0d89c9f3d 100644 --- a/csvkit/sql.py +++ b/csvkit/sql.py @@ -22,7 +22,7 @@ NULL_COLUMN_MAX_LENGTH = 32 -def make_column(column): +def make_column(column, loosey=False): """ Creates a sqlalchemy column from a csvkit Column. """ @@ -45,12 +45,13 @@ def make_column(column): else: raise ValueError('Unexpected normalized column type: %s' % column.type) - if column.type is NoneType: - sql_type_kwargs['length'] = NULL_COLUMN_MAX_LENGTH - elif column.type is unicode: - sql_type_kwargs['length'] = column.max_length() + if loosey is False: + if column.type is NoneType: + sql_type_kwargs['length'] = NULL_COLUMN_MAX_LENGTH + elif column.type is unicode: + sql_type_kwargs['length'] = column.max_length() - sql_column_kwargs['nullable'] = column.has_nulls() + sql_column_kwargs['nullable'] = column.has_nulls() return Column(column.name, sql_column_type(**sql_type_kwargs), **sql_column_kwargs) @@ -60,7 +61,7 @@ def get_connection(connection_string): return engine, metadata -def make_table(csv_table, name='table_name', metadata=None): +def make_table(csv_table, name='table_name', loosey=False, metadata=None): """ Creates a sqlalchemy table from a csvkit Table. """ @@ -70,7 +71,7 @@ def make_table(csv_table, name='table_name', metadata=None): sql_table = Table(csv_table.name, metadata) for column in csv_table: - sql_table.append_column(make_column(column)) + sql_table.append_column(make_column(column, loosey)) return sql_table @@ -82,7 +83,7 @@ def make_create_table_statement(sql_table, dialect=None): module = __import__('sqlalchemy.dialects.%s' % DIALECTS[dialect], fromlist=['dialect']) sql_dialect = module.dialect() else: - sql_dialect = None + sql_dialect = None return unicode(CreateTable(sql_table).compile(dialect=sql_dialect)).strip() + ';' diff --git a/csvkit/utilities/csvsql.py b/csvkit/utilities/csvsql.py index 801a9a369..e2ba41079 100644 --- a/csvkit/utilities/csvsql.py +++ b/csvkit/utilities/csvsql.py @@ -22,6 +22,8 @@ def add_arguments(self): help='In addition to creating the table, also insert the data into the table. Only valid when --db is specified.') self.argparser.add_argument('--table', dest='table_name', help='Specify a name for the table to be created. If omitted, the filename (minus extension) will be used.') + self.argparser.add_argument('--loosey', dest='loosey', action='store_true', + help='Generate a schema without limits or null checks. Useful for big tables.') def main(self): if self.args.table_name: @@ -32,6 +34,9 @@ def main(self): else: self.argparser.error('The --table argument is required when providing data over STDIN.') + if self.args.loosey: + loosey = True + if self.args.dialect and self.args.connection_string: self.argparser.error('The --dialect option is only valid when --db is not specified.') @@ -47,7 +52,7 @@ def main(self): except ImportError: raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n') - sql_table = sql.make_table(csv_table, table_name, metadata) + sql_table = sql.make_table(csv_table, table_name, loosey, metadata) sql_table.create() if self.args.insert: @@ -55,11 +60,11 @@ def main(self): headers = csv_table.headers() for row in csv_table.to_rows(serialize_dates=True): - engine.execute(insert, [dict(zip(headers, row)), ]) + engine.execute(insert, [dict(zip(headers, row)), ]) # Writing to file else: - sql_table = sql.make_table(csv_table, table_name) + sql_table = sql.make_table(csv_table, table_name, loosey) self.output_file.write((u'%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect)).encode('utf-8')) if __name__ == '__main__': From 628ffbe03f6a99997290701a262685a36e4dd532 Mon Sep 17 00:00:00 2001 From: Jeff Larson Date: Mon, 9 Jan 2012 14:46:46 -0500 Subject: [PATCH 2/2] docs for --loosey option --- docs/scripts/csvsql.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/scripts/csvsql.rst b/docs/scripts/csvsql.rst index 07443757a..290675c9a 100644 --- a/docs/scripts/csvsql.rst +++ b/docs/scripts/csvsql.rst @@ -39,6 +39,8 @@ Generate SQL statements for a CSV file or create execute those statements direct specified. --table TABLE_NAME Specify a name for the table to be created. If omitted, the filename (minus extension) will be used. + --loosey Generate a schema without limits or null checks. + Useful for big tables. Also see: :doc:`common_arguments`.