In [None]:
import duckdb
import pandas as pd

In [None]:
con = duckdb.connect()  # defaults is memory
con.install_extension("httpfs")
con.load_extension("httpfs")

In [None]:
# same way as sql statement
con.sql("SELECT * FROM 'https://open.gishub.org/data/duckdb/cities.csv';")

In [None]:
con.read_csv("https://open.gishub.org/data/duckdb/cities.csv")

In [None]:
pandas_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
pandas_df

In [None]:
con.sql("SELECT * FROM pandas_df;")

In [None]:
df = con.read_csv("https://open.gishub.org/data/duckdb/cities.csv").df()
df.head()

In [None]:
df2 = pd.read_csv("https://open.gishub.org/data/duckdb/cities.csv")
df2.head()

In [None]:
con.sql("SELECT 1").fetchall()[0][0]  # python objects

In [None]:
con.sql("SELECT 1").fetchnumpy()

# Write output to a file

In [None]:
con.sql("SELECT 42").write_parquet("out.parquet")  # Write to a Parquet file
con.sql("SELECT 42").write_csv("out.csv")  # Write to a CSV file
con.sql("COPY (SELECT 42) TO 'out.parquet'")  # Copy to a parquet file

In [None]:
# create a connection to a file called 'file.db'
con = duckdb.connect("file.db")
# create a table and load data into it
con.sql(
    'CREATE TABLE IF NOT EXISTS cities AS FROM read_csv_auto("https://open.gishub.org/data/duckdb/cities.csv")'
)
# query the table
con.table("cities").show()
# Note: connections also closed implicitly when they go out of scope

In [None]:
con.sql(
    'CREATE OR REPLACE TABLE cities AS FROM read_csv_auto("https://open.gishub.org/data/duckdb/cities.csv")'
)

In [None]:
con.close()

In [None]:
with duckdb.connect("file.db") as con:
    con.sql(
        'CREATE TABLE IF NOT EXISTS cities AS FROM read_csv_auto("https://open.gishub.org/data/duckdb/cities.csv")'
    )
    con.table("cities").show()
    # the context manager closes the connection automatically