In [1]:
import duckdb

con = duckdb.connect()

In [2]:
con.execute("""
CREATE TABLE environment (
    Amenity BOOLEAN,
    Bump BOOLEAN,
    Crossing BOOLEAN,
    Give_Way BOOLEAN,
    Junction BOOLEAN,
    No_Exit BOOLEAN,
    Railway BOOLEAN,
    Roundabout BOOLEAN,
    Station BOOLEAN,
    Stop BOOLEAN,
    Traffic_Calming BOOLEAN,
    Traffic_Signal BOOLEAN,
    Turning_Loop BOOLEAN,
    Environment_ID INTEGER PRIMARY KEY
);

CREATE TABLE location (
    Street VARCHAR,
    City VARCHAR,
    County VARCHAR,
    State CHAR(2),
    Location_ID INTEGER PRIMARY KEY
);

CREATE TABLE twilight (
    Sunrise_Sunset VARCHAR,
    Civil_Twilight VARCHAR,
    Nautical_Twilight VARCHAR,
    Astronomical_Twilight VARCHAR,
    Twilight_ID INTEGER PRIMARY KEY
);


CREATE TABLE weather (
    Weather_Condition VARCHAR,
    Weather_Condition_ID INTEGER PRIMARY KEY
);

CREATE TABLE wind (
    Wind_Direction VARCHAR,
    Wind_Direction_ID INTEGER PRIMARY KEY
);

CREATE TABLE accident (
    Accident_ID BIGINT PRIMARY KEY,        -- Use BIGINT for large IDs
    Severity INTEGER,
    Start_Time TIMESTAMP,
    End_Time TIMESTAMP,
    Start_Lat DECIMAL(10, 7),              -- High precision for latitude/longitude
    Start_Lng DECIMAL(10, 7),
    End_Lat DECIMAL(10, 7),
    End_Lng DECIMAL(10, 7),
    Distance_mi DECIMAL(5, 2),            -- Distance in miles, up to 999.99
    Weather_Timestamp TIMESTAMP,
    Temperature_F DECIMAL(8, 2),          -- Temperature in Fahrenheit
    Humidity_percent DECIMAL(8, 2),       -- Percentage values
    Wind_Speed_mph DECIMAL(8, 2),         -- Wind speed in mph
    Precipitation_in DECIMAL(8, 2),       -- Precipitation in inches
    Visibility_mi DECIMAL(8, 2),          -- Visibility in miles
    Location_ID INTEGER REFERENCES location(Location_ID),
    Weather_Condition_ID INTEGER REFERENCES weather(Weather_Condition_ID),
    Wind_Direction_ID INTEGER REFERENCES wind(Wind_Direction_ID),
    Environment_ID INTEGER REFERENCES environment(Environment_ID),
    Twilight_ID INTEGER REFERENCES twilight(Twilight_ID)
);
""")

<duckdb.duckdb.DuckDBPyConnection at 0x17f3bb0e970>

In [3]:
con.execute("""
COPY environment FROM '../data/environment_dim.csv' (AUTO_DETECT TRUE);
COPY location FROM '../data/location_dim.csv' (AUTO_DETECT TRUE);
COPY twilight FROM '../data/twilight_dim.csv' (AUTO_DETECT TRUE);
COPY weather FROM '../data/weather_condition_dim.csv' (AUTO_DETECT TRUE);
COPY wind FROM '../data/wind_direction_dim.csv' (AUTO_DETECT TRUE);
COPY accident FROM '../data/accident_fact.csv' (AUTO_DETECT TRUE);
""")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<duckdb.duckdb.DuckDBPyConnection at 0x17f3bb0e970>

In [4]:
result = con.execute("SELECT * FROM accident LIMIT 5;").fetchdf()
result

Unnamed: 0,Accident_ID,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance_mi,Weather_Timestamp,Temperature_F,Humidity_percent,Wind_Speed_mph,Precipitation_in,Visibility_mi,Location_ID,Weather_Condition_ID,Wind_Direction_ID,Environment_ID,Twilight_ID
0,5337731,2,2021-11-27 05:42:00,2021-11-27 07:00:22,40.301935,-109.810405,40.301952,-109.806051,0.23,2021-11-27 05:53:00,21.0,71.0,7.0,0.0,10.0,19,2,11,1,10
1,6271420,2,2020-10-11 05:56:25,2020-10-11 07:11:33,45.76376,-93.618744,45.763824,-93.618561,0.01,2020-10-11 05:55:00,46.0,81.0,10.0,0.0,10.0,21,2,3,1,10
2,6384165,2,2020-10-11 05:56:25,2020-10-11 08:56:37,45.76376,-93.618744,45.765732,-93.613197,0.3,2020-10-11 05:55:00,46.0,81.0,10.0,0.0,10.0,21,2,3,1,10
3,3006827,2,2022-04-07 20:56:51,2022-04-07 22:16:03,27.863388,-82.638733,27.864005,-82.638726,0.04,2022-04-07 20:53:00,71.0,93.0,6.0,0.0,10.0,25,3,11,1,10
4,5592800,2,2021-04-13 21:34:11,2021-04-14 00:55:20,44.949114,-95.391342,44.949061,-95.38431,0.34,2021-04-13 21:35:00,30.0,86.0,15.0,0.0,10.0,28,3,11,1,10


In [5]:
con.execute("""
EXPORT DATABASE '../data/warehouse';
""")

<duckdb.duckdb.DuckDBPyConnection at 0x17f3bb0e970>