In [2]:
import duckdb
import pandas as pd

In [3]:
con= duckdb.connect() # defaults is memory 
con.install_extension('httpfs')
con.load_extension('httpfs')

In [4]:
# same way as sql statement
con.sql("SELECT * FROM 'https://open.gishub.org/data/duckdb/cities.csv';")

┌───────┬──────────────────┬─────────┬───────────┬───────────┬────────────┐
│  id   │       name       │ country │ latitude  │ longitude │ population │
│ int64 │     varchar      │ varchar │  double   │  double   │   int64    │
├───────┼──────────────────┼─────────┼───────────┼───────────┼────────────┤
│     1 │ Bombo            │ UGA     │    0.5833 │   32.5333 │      75000 │
│     2 │ Fort Portal      │ UGA     │     0.671 │    30.275 │      42670 │
│     3 │ Potenza          │ ITA     │    40.642 │    15.799 │      69060 │
│     4 │ Campobasso       │ ITA     │    41.563 │    14.656 │      50762 │
│     5 │ Aosta            │ ITA     │    45.737 │     7.315 │      34062 │
│     6 │ Mariehamn        │ ALD     │    60.097 │    19.949 │      10682 │
│     7 │ Ramallah         │ PSE     │  31.90294 │  35.20621 │      24599 │
│     8 │ Vatican City     │ VAT     │  41.90001 │  12.44781 │        832 │
│     9 │ Poitier          │ FRA     │  46.58329 │   0.33328 │      85960 │
│    10 │ Cl

In [5]:
con.read_csv('https://open.gishub.org/data/duckdb/cities.csv')

┌───────┬──────────────────┬─────────┬───────────┬───────────┬────────────┐
│  id   │       name       │ country │ latitude  │ longitude │ population │
│ int64 │     varchar      │ varchar │  double   │  double   │   int64    │
├───────┼──────────────────┼─────────┼───────────┼───────────┼────────────┤
│     1 │ Bombo            │ UGA     │    0.5833 │   32.5333 │      75000 │
│     2 │ Fort Portal      │ UGA     │     0.671 │    30.275 │      42670 │
│     3 │ Potenza          │ ITA     │    40.642 │    15.799 │      69060 │
│     4 │ Campobasso       │ ITA     │    41.563 │    14.656 │      50762 │
│     5 │ Aosta            │ ITA     │    45.737 │     7.315 │      34062 │
│     6 │ Mariehamn        │ ALD     │    60.097 │    19.949 │      10682 │
│     7 │ Ramallah         │ PSE     │  31.90294 │  35.20621 │      24599 │
│     8 │ Vatican City     │ VAT     │  41.90001 │  12.44781 │        832 │
│     9 │ Poitier          │ FRA     │  46.58329 │   0.33328 │      85960 │
│    10 │ Cl

In [6]:
pandas_df= pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
pandas_df

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [7]:
con.sql('SELECT * FROM pandas_df;')

┌───────┬───────┐
│   a   │   b   │
│ int64 │ int64 │
├───────┼───────┤
│     1 │     4 │
│     2 │     5 │
│     3 │     6 │
└───────┴───────┘

In [8]:
df= con.read_csv('https://open.gishub.org/data/duckdb/cities.csv').df()
df.head()

Unnamed: 0,id,name,country,latitude,longitude,population
0,1,Bombo,UGA,0.5833,32.5333,75000
1,2,Fort Portal,UGA,0.671,30.275,42670
2,3,Potenza,ITA,40.642,15.799,69060
3,4,Campobasso,ITA,41.563,14.656,50762
4,5,Aosta,ITA,45.737,7.315,34062


In [9]:
df2= pd.read_csv('https://open.gishub.org/data/duckdb/cities.csv')
df2.head()

Unnamed: 0,id,name,country,latitude,longitude,population
0,1,Bombo,UGA,0.5833,32.5333,75000
1,2,Fort Portal,UGA,0.671,30.275,42670
2,3,Potenza,ITA,40.642,15.799,69060
3,4,Campobasso,ITA,41.563,14.656,50762
4,5,Aosta,ITA,45.737,7.315,34062


In [10]:
con.sql('SELECT 1').fetchall()[0][0] #python objects

1

In [11]:
con.sql('SELECT 1').fetchnumpy()

{'1': array([1])}

# Write output to a file

In [12]:
con.sql('SELECT 42').write_parquet('out.parquet')  # Write to a Parquet file
con.sql('SELECT 42').write_csv('out.csv')  # Write to a CSV file
con.sql("COPY (SELECT 42) TO 'out.parquet'")  # Copy to a parquet file

In [16]:
# create a connection to a file called 'file.db'
con = duckdb.connect('file.db')
# create a table and load data into it
con.sql(
    'CREATE TABLE IF NOT EXISTS cities AS FROM read_csv_auto("https://open.gishub.org/data/duckdb/cities.csv")'
)
# query the table
con.table('cities').show()
# Note: connections also closed implicitly when they go out of scope

┌───────┬──────────────────┬─────────┬───────────┬───────────┬────────────┐
│  id   │       name       │ country │ latitude  │ longitude │ population │
│ int64 │     varchar      │ varchar │  double   │  double   │   int64    │
├───────┼──────────────────┼─────────┼───────────┼───────────┼────────────┤
│     1 │ Bombo            │ UGA     │    0.5833 │   32.5333 │      75000 │
│     2 │ Fort Portal      │ UGA     │     0.671 │    30.275 │      42670 │
│     3 │ Potenza          │ ITA     │    40.642 │    15.799 │      69060 │
│     4 │ Campobasso       │ ITA     │    41.563 │    14.656 │      50762 │
│     5 │ Aosta            │ ITA     │    45.737 │     7.315 │      34062 │
│     6 │ Mariehamn        │ ALD     │    60.097 │    19.949 │      10682 │
│     7 │ Ramallah         │ PSE     │  31.90294 │  35.20621 │      24599 │
│     8 │ Vatican City     │ VAT     │  41.90001 │  12.44781 │        832 │
│     9 │ Poitier          │ FRA     │  46.58329 │   0.33328 │      85960 │
│    10 │ Cl

In [17]:
con.sql(
    'CREATE OR REPLACE TABLE cities AS FROM read_csv_auto("https://open.gishub.org/data/duckdb/cities.csv")'
)

In [18]:
con.close()