diff --git a/AUTHORS b/AUTHORS index 425298eef..b4ffc2e69 100644 --- a/AUTHORS +++ b/AUTHORS @@ -11,4 +11,5 @@ The following individuals have contributed code to csvkit: * Matt Bone * Ryan Pitts * Hari Dara +* Jeff Larson diff --git a/csvkit/sql.py b/csvkit/sql.py index 0d89c9f3d..071a12823 100644 --- a/csvkit/sql.py +++ b/csvkit/sql.py @@ -22,7 +22,7 @@ NULL_COLUMN_MAX_LENGTH = 32 -def make_column(column, loosey=False): +def make_column(column, no_constraints=False): """ Creates a sqlalchemy column from a csvkit Column. """ @@ -45,7 +45,7 @@ def make_column(column, loosey=False): else: raise ValueError('Unexpected normalized column type: %s' % column.type) - if loosey is False: + if no_constraints is False: if column.type is NoneType: sql_type_kwargs['length'] = NULL_COLUMN_MAX_LENGTH elif column.type is unicode: @@ -61,7 +61,7 @@ def get_connection(connection_string): return engine, metadata -def make_table(csv_table, name='table_name', loosey=False, metadata=None): +def make_table(csv_table, name='table_name', no_constraints=False, metadata=None): """ Creates a sqlalchemy table from a csvkit Table. """ @@ -71,7 +71,7 @@ def make_table(csv_table, name='table_name', loosey=False, metadata=None): sql_table = Table(csv_table.name, metadata) for column in csv_table: - sql_table.append_column(make_column(column, loosey)) + sql_table.append_column(make_column(column, no_constraints)) return sql_table diff --git a/csvkit/utilities/csvsql.py b/csvkit/utilities/csvsql.py index e2ba41079..11c8f824c 100644 --- a/csvkit/utilities/csvsql.py +++ b/csvkit/utilities/csvsql.py @@ -22,8 +22,8 @@ def add_arguments(self): help='In addition to creating the table, also insert the data into the table. Only valid when --db is specified.') self.argparser.add_argument('--table', dest='table_name', help='Specify a name for the table to be created. If omitted, the filename (minus extension) will be used.') - self.argparser.add_argument('--loosey', dest='loosey', action='store_true', - help='Generate a schema without limits or null checks. Useful for big tables.') + self.argparser.add_argument('--no-constraints', dest='no_constraints', action='store_true', + help='Generate a schema without length limits or null checks. Useful when sampling big tables.') def main(self): if self.args.table_name: @@ -34,8 +34,8 @@ def main(self): else: self.argparser.error('The --table argument is required when providing data over STDIN.') - if self.args.loosey: - loosey = True + if self.args.no_constraints: + no_constraints = True if self.args.dialect and self.args.connection_string: self.argparser.error('The --dialect option is only valid when --db is not specified.') @@ -52,7 +52,7 @@ def main(self): except ImportError: raise ImportError('You don\'t appear to have the necessary database backend installed for connection string you\'re trying to use.. Available backends include:\n\nPostgresql:\tpip install psycopg2\nMySQL:\t\tpip install MySQL-python\n\nFor details on connection strings and other backends, please see the SQLAlchemy documentation on dialects at: \n\nhttp://www.sqlalchemy.org/docs/dialects/\n\n') - sql_table = sql.make_table(csv_table, table_name, loosey, metadata) + sql_table = sql.make_table(csv_table, table_name, no_constraints, metadata) sql_table.create() if self.args.insert: @@ -64,7 +64,7 @@ def main(self): # Writing to file else: - sql_table = sql.make_table(csv_table, table_name, loosey) + sql_table = sql.make_table(csv_table, table_name, no_constraints) self.output_file.write((u'%s\n' % sql.make_create_table_statement(sql_table, dialect=self.args.dialect)).encode('utf-8')) if __name__ == '__main__': diff --git a/docs/scripts/csvsql.rst b/docs/scripts/csvsql.rst index 290675c9a..e009fd7c0 100644 --- a/docs/scripts/csvsql.rst +++ b/docs/scripts/csvsql.rst @@ -39,8 +39,8 @@ Generate SQL statements for a CSV file or create execute those statements direct specified. --table TABLE_NAME Specify a name for the table to be created. If omitted, the filename (minus extension) will be used. - --loosey Generate a schema without limits or null checks. - Useful for big tables. + --no-constraints Generate a schema without length limits or null + checks. Useful when sampling big tables. Also see: :doc:`common_arguments`. @@ -56,5 +56,9 @@ Generate a statement in the PostgreSQL dialect:: Create a table and import data from the CSV directly into Postgres:: $ createdb test - $ csvsql --db postgresql:///test --name fy09 --insert examples/realdata/FY09_EDU_Recipients_by_State.csv + $ csvsql --db postgresql:///test --table fy09 --insert examples/realdata/FY09_EDU_Recipients_by_State.csv + +For large tables it may not be practical to process the entire table. One solution to this is to analyze a sample of the table. In this case it can be useful to turn off length limits and null checks with the ``no-constraints`` option:: + + $ head -n 20 examples/realdata/FY09_EDU_Recipients_by_State.csv | csvsql --no-constraints --table fy09