In [1]:
from shared import *

In [2]:
db_params = {
    'host': config["IN3_HOST"],
    'database': config["IN3_DB"],
    'user': config["IN3_USER"],
    'password': config["IN3_PWD"],
    'port': config["IN3_PORT"]
}

engine = get_engine(db_params)
engine

postgresql+psycopg2://root:root@127.0.0.1:5432/db


Engine(postgresql+psycopg2://root:***@127.0.0.1:5432/db)

In [3]:
sql_query = f"""
select 
    t.latitude, t.longitude, t.geom as geometry, t.adm_prov as province, t.area, t.updated_at, t.date_start, t.date_end, t.percentuale_vegetazione_spontanea as svp_manual, t.validità as validity,
    a.type_name as water_source, 
    -- b.*, 
    c.element_name as environmental_element, c.element_category as environmental_category, 
    d.colture_name as crop_type,
    ft.*
from test_dim_trap t,
    (select DISTINCT ON (i.gid) i.gid, a.* from test_bridge_trap_acque_interne i, test_dim_acque_interne a where i.acque_interne_id = a.acque_interne_id order by i.gid, acque_interne_id) a,
    -- (select DISTINCT ON (i.gid) i.gid, a.* from test_bridge_trap_rete_bonifica i, test_dim_rete_bonifica a where i.rete_bonifica_id = a.rete_bonifica_id order by i.gid, rete_bonifica_id) b,
    (select DISTINCT ON (i.gid) i.gid, a.* from test_bridge_trap_case i, test_dim_case a where i.cid = a.cid order by i.gid, cid) c,
    (select DISTINCT ON (i.gid) i.gid, a.* from test_bridge_trap_uso_suolo i, test_dim_uso_suolo a where i.cid = a.cid order by i.gid, cid) d,
    test_fact_passive_monitoring_normalized ft
    -- , test_veg_sp_mtci14_ndvi07 sp, "percentage_GROUND_TRUTH_2021" spgt
where t.gid = a.gid and t.gid = c.gid and t.gid = d.gid and ft.gid = t.gid; -- and t.gid = b.gid and t.gid = sp.gid and t.gid = spgt.gid;
"""
dt = pd.read_sql(sql_query, engine)
dt.columns = [x.replace("day_", "").replace("rad_", "radiations_").replace("u_", "humidity_").replace("prec_", "precipitations_").replace("t_", "temperature_") for x in dt.columns]
dt = dt.rename({
    'Giorni monitoraggio': 'days_since_last_monitoring',
    'grado_giorno': 'degree_days',
    'Adulti': 'adults',
    'Giovani II - III (small)': 'small_instars',
    'Giovani IV - V (large)': 'large_instars',
    'evapo_trans': 'evapo_transpiration'
}, axis=1)
dt["total_captures"] = dt["adults"] + dt["small_instars"] + dt["large_instars"]
dt = dt.replace("", np.nan).convert_dtypes()
for c in [x for x in dt.columns if "_min" in x or "_max" in x or "_avg" in x or "_day" in x]:
    dt[c] = dt[c].astype(float)
dt['timestamp'] = pd.to_datetime(dt['timestamp'], unit='s')
dt['date'] = dt['timestamp'].dt.date
dt['year'] = dt['timestamp'].dt.year
dt['month'] = dt['timestamp'].dt.month
dt['month'] = dt.apply(lambda x: '{}-{}'.format(x["year"], x["month"]), axis=1)
dt['week'] = dt['timestamp'].dt.isocalendar().week

dt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10949 entries, 0 to 10948
Data columns (total 39 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   latitude                    10949 non-null  Float64       
 1   longitude                   10949 non-null  Float64       
 2   geometry                    10949 non-null  string        
 3   province                    10949 non-null  string        
 4   area                        10949 non-null  string        
 5   updated_at                  10949 non-null  datetime64[ns]
 6   date_start                  10949 non-null  object        
 7   date_end                    10949 non-null  object        
 8   svp_manual                  10198 non-null  Int64         
 9   validity                    10949 non-null  string        
 10  water_source                10949 non-null  string        
 11  environmental_element       10949 non-null  string    

In [4]:
dt.columns

Index(['latitude', 'longitude', 'geometry', 'province', 'area', 'updated_at',
       'date_start', 'date_end', 'svp_manual', 'validity', 'water_source',
       'environmental_element', 'environmental_category', 'crop_type',
       'timestamp', 'gid', 'adults', 'small_instars', 'large_instars',
       'temperature_avg', 'temperature_max', 'temperature_min', 'humidity_avg',
       'humidity_max', 'humidity_min', 'precipitations_day', 'radiations_day',
       'evapo_transpiration', 'wind_direction_day', 'wind_speed_avg',
       'wind_speed_max', 'Ore utili', 'days_since_last_monitoring',
       'degree_days', 'total_captures', 'date', 'year', 'month', 'week'],
      dtype='object')

![image](./imgs/cimice-dfm.JPG)


In [5]:
tables = {
    "cimice_ft_captures": {"col": ['gid', 'timestamp', 'adults', 'small_instars', 'large_instars', 'temperature_avg', 'temperature_max', 'temperature_min', 'humidity_avg', 'humidity_max', 'humidity_min', 'precipitations_day', 'radiations_day', 'evapo_transpiration', 'wind_direction_day', 'wind_speed_avg', 'wind_speed_max', 'days_since_last_monitoring', 'degree_days', 'total_captures']},
    "cimice_dt_trap": {"col": ['gid', 'latitude', 'longitude', 'geometry', 'province', 'area', 'validity', 'water_source', 'environmental_element', 'environmental_category', 'crop_type' ]},
    "cimice_dt_time": {"col": ['timestamp', 'date', 'month', 'year', 'week' ]}
}
tables

{'cimice_ft_captures': {'col': ['gid',
   'timestamp',
   'adults',
   'small_instars',
   'large_instars',
   'temperature_avg',
   'temperature_max',
   'temperature_min',
   'humidity_avg',
   'humidity_max',
   'humidity_min',
   'precipitations_day',
   'radiations_day',
   'evapo_transpiration',
   'wind_direction_day',
   'wind_speed_avg',
   'wind_speed_max',
   'days_since_last_monitoring',
   'degree_days',
   'total_captures']},
 'cimice_dt_trap': {'col': ['gid',
   'latitude',
   'longitude',
   'geometry',
   'province',
   'area',
   'validity',
   'water_source',
   'environmental_element',
   'environmental_category',
   'crop_type']},
 'cimice_dt_time': {'col': ['timestamp', 'date', 'month', 'year', 'week']}}

In [6]:
ft = dt[tables["cimice_ft_captures"]["col"]].drop_duplicates()
ft.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10949 entries, 0 to 10948
Data columns (total 20 columns):
 #   Column                      Non-Null Count  Dtype        
---  ------                      --------------  -----        
 0   gid                         10949 non-null  Int64        
 1   timestamp                   10949 non-null  datetime64[s]
 2   adults                      10949 non-null  Float64      
 3   small_instars               10949 non-null  Float64      
 4   large_instars               10949 non-null  Float64      
 5   temperature_avg             10949 non-null  float64      
 6   temperature_max             10949 non-null  float64      
 7   temperature_min             10949 non-null  float64      
 8   humidity_avg                10949 non-null  float64      
 9   humidity_max                8180 non-null   float64      
 10  humidity_min                8180 non-null   float64      
 11  precipitations_day          10949 non-null  float64      
 12  radi

In [7]:
dt_t = dt[tables["cimice_dt_trap"]["col"]].drop_duplicates()
dt_t

Unnamed: 0,gid,latitude,longitude,geometry,province,area,validity,water_source,environmental_element,environmental_category,crop_type
0,862,44.251545,12.016743,0101000020E6100000B5DFDA8992082840753C66A03220...,FC,FC-RA-RN,si,Nessuno,"Fagacee, Ulmacee, Malvales (quercia, olmo, tig...",Giardini e boschetti,CEREALE INVERNALE
1,471,44.297194,11.837763,0101000020E6100000B59F3138EFAC2740F54652770A26...,RA,FC-RA-RN,si,lago artificiale,Susino,Colture arboree,Nessuno
2,40,44.365355,11.606561,0101000020E6100000E00ED4298F3627400D37E0F3C32E...,BO,FC-RA-RN,si,Nessuno,"Bacche (Cornus, Rhamnus frangula, Viburnum)",Siepi e bordure,CEREALE INVERNALE
3,826,44.775181,10.990222,0101000020E6100000368FC360FEFA254084BD89213963...,MO,MO-RE,si,Nessuno,Fabbricato ad uso abitativo,Fabbricati ad uso abitativo,CEREALE INVERNALE
4,467,44.374143,11.845897,0101000020E6100000528E137019B12740A902A4ECE32F...,RA,FC-RA-RN,si,Nessuno,Susino,Colture arboree,Nessuno
...,...,...,...,...,...,...,...,...,...,...,...
1623,87,44.823247,11.789847,0101000020E6100000A2AC29DA669427401C85DA256069...,FE,BO-FE,si,Nessuno,Melo,Colture arboree,CEREALE INVERNALE
1739,104,44.251545,12.016743,0101000020E6100000B5DFDA8992082840753C66A03220...,FC,FC-RA-RN,si,Nessuno,"Fagacee, Ulmacee, Malvales (quercia, olmo, tig...",Giardini e boschetti,PICCOLI FRUTTI
1767,174,44.758544,10.741033,0101000020E6100000CA81C3B0687B25401D0BC0F51761...,RE,MO-RE,no,Nessuno,"Bacche (Cornus, Rhamnus frangula, Viburnum)",Siepi e bordure,CEREALE INVERNALE
1886,78,44.968972,12.002842,0101000020E6100000EE18688874012840D9DDF247077C...,FE,BO-FE,si,Nessuno,Pero,Colture arboree,CEREALE INVERNALE


In [8]:
from psycopg2 import sql

engine = get_engine(out_db_params)
conn = get_connection(config)
cursor = conn.cursor()

for tablename, columns in tables.items():
    try: 
        create_db_query = sql.SQL("DROP TABLE {} CASCADE;").format(sql.Identifier(tablename))
        cursor.execute(create_db_query)
        conn.commit()
    except:
        print("Table {} does not exist".format(tablename))
    
    try: 
        dt[columns["col"]].drop_duplicates().to_sql(tablename, engine, if_exists='replace', index=False)
    except Exception as e:
        print(e) 
        print("FAIL to create {}".format(tablename))


for statement in [
    "ALTER TABLE cimice_dt_time ADD PRIMARY KEY (timestamp);",
    "ALTER TABLE cimice_dt_trap ADD PRIMARY KEY (gid);",
    "ALTER TABLE cimice_ft_captures ADD PRIMARY KEY (gid, timestamp);",
    "ALTER TABLE cimice_ft_captures ADD FOREIGN KEY (timestamp) REFERENCES cimice_dt_time(timestamp);",
    "ALTER TABLE cimice_ft_captures ADD FOREIGN KEY (gid) REFERENCES cimice_dt_trap(gid);",
    ]:
    try: 
        print(statement)
        create_db_query = sql.SQL(statement)
        cursor.execute(create_db_query)
        conn.commit()
    except Exception as e:
        print(e) 

cursor.close()
conn.close()

postgresql+psycopg2://root:root@127.0.0.1:5432/db
Table cimice_ft_captures does not exist
Table cimice_dt_trap does not exist
Table cimice_dt_time does not exist
ALTER TABLE cimice_dt_time ADD PRIMARY KEY (timestamp);
ALTER TABLE cimice_dt_trap ADD PRIMARY KEY (gid);
ALTER TABLE cimice_ft_captures ADD PRIMARY KEY (gid, timestamp);
ALTER TABLE cimice_ft_captures ADD FOREIGN KEY (timestamp) REFERENCES cimice_dt_time(timestamp);
ALTER TABLE cimice_ft_captures ADD FOREIGN KEY (gid) REFERENCES cimice_dt_trap(gid);
