In [1]:
# Import the dependencies
import pandas as pd
import numpy as np
import datetime

# ORM Stuff
from sqlalchemy import create_engine, inspect


In [2]:
df = pd.read_csv("cleaned_meteorite_data.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,name,id,name_type,rec_class,mass,fall,year,rec_lat,rec_long,geo_location,location_type
0,0,Aachen,1,Valid,L5,21.0,Fell,1880.0,50.775,6.08333,,Exact
1,1,Aarhus,2,Valid,H6,720.0,Fell,1951.0,56.18333,10.23333,,Exact
2,2,Abee,6,Valid,EH4,107000.0,Fell,1952.0,54.21667,-113.0,,Exact
3,3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976.0,16.88333,-99.9,,Exact
4,4,Achiras,370,Valid,L6,780.0,Fell,1902.0,-33.16667,-64.95,,Exact


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45716 entries, 0 to 45715
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     45716 non-null  int64  
 1   name           45716 non-null  object 
 2   id             45716 non-null  int64  
 3   name_type      45716 non-null  object 
 4   rec_class      45716 non-null  object 
 5   mass           45585 non-null  float64
 6   fall           45716 non-null  object 
 7   year           45428 non-null  float64
 8   rec_lat        44149 non-null  float64
 9   rec_long       44149 non-null  float64
 10  geo_location   12183 non-null  object 
 11  location_type  45716 non-null  object 
dtypes: float64(4), int64(2), object(6)
memory usage: 4.2+ MB


In [4]:
df = df.drop(columns=['geo_location'])  # Drop the column
df['year'] = df['year'].fillna(0).astype(int)  # Convert year to int, replacing NaN with 0


In [5]:
df = df.drop(columns=['name_type', 'location_type', 'fall', 'Unnamed: 0', 'name'])  

In [6]:
df = df.dropna()  # Drop rows with NaN values
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 44029 entries, 0 to 45715
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   id         44029 non-null  int64  
 1   rec_class  44029 non-null  object 
 2   mass       44029 non-null  float64
 3   year       44029 non-null  int64  
 4   rec_lat    44029 non-null  float64
 5   rec_long   44029 non-null  float64
dtypes: float64(3), int64(2), object(1)
memory usage: 2.4+ MB


In [7]:
engine = create_engine("sqlite:///meteorites.sqlite")

In [8]:
# Random sample for SPEED
df2 = df.sample(40000)
df2.head()

Unnamed: 0,id,rec_class,mass,year,rec_lat,rec_long
17882,34930,L5,5.2,2003,-86.36667,-70.0
1705,520,H5,7.3,1982,-77.04713,157.28227
39193,25154,L6,156.8,1975,-71.5,35.66667
38589,24549,LL3,4.65,1974,-71.5,35.66667
42760,29511,L3,39.07,1986,-71.5,35.66667


In [9]:
df2.to_sql(name="meteorites", con=engine, index=False, if_exists="append", method="multi")

40000

In [10]:
# Create the inspector and connect it to the engine
inspector = inspect(engine)

# Collect the names of tables within the database
tables = inspector.get_table_names()

# Using the inspector to print the column names within the 'dow' table and its types
for table in tables:
    print(table)
    print("--------")
    columns = inspector.get_columns(table)
    for column in columns:
        print(column["name"], column["type"])

    print()

meteorites
--------
id BIGINT
rec_class TEXT
mass FLOAT
year BIGINT
rec_lat FLOAT
rec_long FLOAT



In [11]:
engine.dispose()