In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from netCDF4 import Dataset
import xarray as xr
import glob
import os
import py7zr

## Build list of COMIDs (by HUC10) from retrospective datasets (temp and covariates)

In [8]:
temp_dir = '/Volumes/Elements/URycki/temp_data/'

In [13]:
# Loop through HUC 10s
retro_comids = []
temp_dir_retro = os.path.join(temp_dir, 'preds_retro/')
huc10_retro_files = glob.glob(os.path.join(temp_dir_retro, 'temp_files', '*/*.csv'))
huc10_retro_files.sort()
len(huc10_retro_files)

1541

In [140]:
# Caution: This cell takes ~3 hours to run
# Temp files
retro_comids = {}
for h in huc10_retro_files:
    cur_huc = h[-14:-4] 
    print('\t', f'{cur_huc}')

    stdf = pd.read_csv(h, usecols = ['COMID'])
    retro_comids[cur_huc] = stdf.COMID.unique().tolist()

	 1701010106


In [None]:
#Make dataframe with COMID as index [from retrospective temp predictions dataset]
frames = []
for k in retro_comids.keys():
    curdf = pd.DataFrame(retro_comids[k], columns = ['COMID'])
    curdf['huc10'] = k
    frames.append(curdf)
comids = pd.concat(frames).set_index('COMID')
comids.to_csv(os.path.join(temp_dir_retro, 'comids_by_huc_retro.csv'))

In [128]:
cov_csvs = glob.glob(os.path.join(temp_dir, 'covariates_retro', 'cov_csvs', '*'))
cov_csvs.sort()

In [None]:
%%time
# Covariate files
retro_cov_comids = {}
for h in cov_csvs:
    cur_huc = h[-19:-9]
    print('\t', f'{cur_huc}')

    stdf = pd.read_csv(h, usecols = ['COMID'])
    retro_cov_comids[cur_huc] = stdf.COMID.unique().tolist()

In [None]:
frames = []
for k in retro_cov_comids.keys():
    curdf = pd.DataFrame(retro_cov_comids[k], columns = ['COMID'])
    curdf['huc10'] = k
    frames.append(curdf)
comids = pd.concat(frames).set_index('COMID')
comids.to_csv(os.path.join(temp_dir_retro, 'comids_by_huc_cov_retro.csv'))

### Import lists of COMIDs

In [None]:
comids_temp = pd.read_csv(os.path.join(temp_dir_retro, 'comids_by_huc_retro.csv'))
comids_cov = pd.read_csv(os.path.join(temp_dir_retro, 'comids_by_huc_cov_retro.csv'))

comids_all = pd.concat([comids_temp.set_index('COMID'), comids_cov.set_index('COMID')])
#df3[~df3.index.duplicated(keep='first')]
comids_all = comids_all[~comids_all.index.duplicated(keep = 'first')]
comids_all.sort_index(inplace = True)

comids_all

## Layer 1: ComID geometry table
### Import flowlines

In [2]:
dbfhydro_files = glob.glob(r'../DATA/NHDPlusPN/NHDPlus17/NHDSnapshot/*/*.dbf') 
dbfhydro_files

['../DATA/NHDPlusPN/NHDPlus17/NHDSnapshot/Hydrography/NHDFlowline.dbf',
 '../DATA/NHDPlusPN/NHDPlus17/NHDSnapshot/Hydrography/NHDLineEventFC.dbf',
 '../DATA/NHDPlusPN/NHDPlus17/NHDSnapshot/Hydrography/NHDArea.dbf',
 '../DATA/NHDPlusPN/NHDPlus17/NHDSnapshot/Hydrography/NHDPoint.dbf',
 '../DATA/NHDPlusPN/NHDPlus17/NHDSnapshot/Hydrography/NHDAreaEventFC.dbf',
 '../DATA/NHDPlusPN/NHDPlus17/NHDSnapshot/Hydrography/NHDPointEventFC.dbf',
 '../DATA/NHDPlusPN/NHDPlus17/NHDSnapshot/Hydrography/NHDLine.dbf',
 '../DATA/NHDPlusPN/NHDPlus17/NHDSnapshot/Hydrography/NHDWaterbody.dbf']

In [3]:
flowlines = gpd.read_file(dbfhydro_files[0])
flowlines[:3]

  return ogr_read(


Unnamed: 0,COMID,FDATE,RESOLUTION,GNIS_ID,GNIS_NAME,LENGTHKM,REACHCODE,FLOWDIR,WBAREACOMI,FTYPE,FCODE,SHAPE_LENG,ENABLED,GNIS_NBR,geometry
0,9301535,1999-10-08,Medium,,,0.999,9040002011583,Uninitialized,0,StreamRiver,46006,0.011661,True,0,"LINESTRING Z (-114.05855 48.99999 0, -114.0583..."
1,22968128,2001-03-02,Medium,,,0.134,10030104009812,Uninitialized,0,StreamRiver,46003,0.001671,True,0,"LINESTRING Z (-113.00089 47.46274 0, -113.0020..."
2,22968138,2001-03-02,Medium,,,0.217,10030104009813,Uninitialized,0,StreamRiver,46003,0.002145,True,0,"LINESTRING Z (-113.0013 47.45837 0, -113.00137..."


In [190]:
flowlines.columns

Index(['COMID', 'FDATE', 'RESOLUTION', 'GNIS_ID', 'GNIS_NAME', 'LENGTHKM',
       'REACHCODE', 'FLOWDIR', 'WBAREACOMI', 'FTYPE', 'FCODE', 'SHAPE_LENG',
       'ENABLED', 'GNIS_NBR', 'geometry'],
      dtype='object')

In [249]:
comids_all

Unnamed: 0_level_0,huc10
COMID,Unnamed: 1_level_1
-81754,1712000410
-81753,1712000810
-81752,1712000907
-81751,1704021405
-81750,1704021506
...,...
947120094,1712000603
947120095,1712000603
947120098,1712000603
947120100,1712000603


In [250]:
# concat Fcode and geometry from flowlines df
df_geom = pd.concat([comids_all, flowlines[['COMID', 'FCODE', 'geometry']].set_index('COMID')], axis = 1, join = 'inner')
df_geom

Unnamed: 0_level_0,huc10,FCODE,geometry
COMID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
22877591,1701010117,55800,"LINESTRING Z (-115.97665 48.55862 0, -115.9780..."
22877593,1701010117,55800,"LINESTRING Z (-115.97724 48.55831 0, -115.9766..."
22877595,1701010117,46006,"LINESTRING Z (-116.00101 48.55763 0, -116.0006..."
22877597,1701010117,46006,"LINESTRING Z (-116.01247 48.55595 0, -116.0086..."
22877599,1701010117,46006,"LINESTRING Z (-116.02837 48.55589 0, -116.0243..."
...,...,...,...
947120094,1712000603,46006,"LINESTRING Z (-120.32129 42.38282 0, -120.3207..."
947120095,1712000603,46006,"LINESTRING Z (-120.31769 42.38113 0, -120.3175..."
947120098,1712000603,46006,"LINESTRING Z (-120.33013 42.3911 0, -120.32983..."
947120100,1712000603,46003,"LINESTRING Z (-120.30641 42.35695 0, -120.3063..."


In [194]:
# concat spatial covariates
spatial_covs = pd.read_csv(r'../DATA/Stream_Temp/spatial_data.csv', index_col = 'COMID')
spatial_covs.rename(columns = {'cov.extru_vol_cat': 'cov.pct_extru_vol_cat'}, inplace = True)
spatial_covs

Unnamed: 0_level_0,cov.length_km,cov.stream_order,cov.path_length,cov.tot.da.sqkm,cov.elev_max_smo,cov.elev_min_smo,cov.slope,cov.sinuosity,cov.lat_v,cov.elev_mean_smo,...,cov.pct_urb_all_ws,cov.canopy_rca,cov.canopy_line,cov.area_sqkm,cov.canopy_ws,cov.dam_name,cov.proportion_dam_influenced,cov.distance_below_dam,cov.dam_hgt_m,cov.dam_main_use
COMID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9301535,0.999,,,,,,,1.47,,,...,,,,,,,,,,
22226592,0.733,,,,,,,1.13,,,...,,,,,,,,,,
22226600,6.117,,,,,,,1.62,,,...,,,,,,,,,,
22877591,0.247,6.0,1290.195,28030.5369,56102.0,55932.0,0.006883,1.00,48.558399,560.170,...,0.946698,89.000000,90.0,1.9161,88.592748,Libby,0.831332,71.294,129.0,Hydroelectricity
22877593,0.055,3.0,1290.442,58.4136,56129.0,56102.0,0.004909,1.00,48.556999,561.155,...,0.000000,90.000000,90.0,0.4284,90.227047,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
947120097,0.161,,,,,,,1.00,,,...,,,,,,,,,,
947120098,1.218,2.0,40.722,10.9971,145858.0,139672.0,0.050788,1.04,42.388000,1427.650,...,0.000000,82.333333,82.0,0.4815,82.684671,,,,,
947120099,0.366,,,,,,,1.01,,,...,,,,,,,,,,
947120100,2.893,3.0,39.940,44.0622,140135.0,138629.0,0.005206,1.12,42.368198,1393.820,...,0.449365,81.000000,78.0,0.2196,80.865589,,,,,


In [195]:
spatial_covs.loc[~spatial_covs.index.isin(df_geom.index)]

Unnamed: 0_level_0,cov.length_km,cov.stream_order,cov.path_length,cov.tot.da.sqkm,cov.elev_max_smo,cov.elev_min_smo,cov.slope,cov.sinuosity,cov.lat_v,cov.elev_mean_smo,...,cov.pct_urb_all_ws,cov.canopy_rca,cov.canopy_line,cov.area_sqkm,cov.canopy_ws,cov.dam_name,cov.proportion_dam_influenced,cov.distance_below_dam,cov.dam_hgt_m,cov.dam_main_use
COMID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9301535,0.999,,,,,,,1.47,,,...,,,,,,,,,,
22226592,0.733,,,,,,,1.13,,,...,,,,,,,,,,
22226600,6.117,,,,,,,1.62,,,...,,,,,,,,,,
22877719,0.036,3.0,1306.120,125.0667,57383.0,57383.0,0.00001,1.00,,573.830,...,,,,,,,,,,
22877871,0.024,2.0,1314.134,2.0835,58878.0,58695.0,0.07625,1.00,,587.865,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
947120083,0.058,3.0,7.035,1785.2382,124996.0,124996.0,0.00001,1.00,43.350601,1249.960,...,2.423727,0.0,0.0,57.8727,18.626561,,,,,
947120092,0.869,6.0,0.000,3182.1678,124996.0,124996.0,0.00001,1.00,43.354499,1249.960,...,1.356886,7.5,7.5,0.0333,48.597820,,,,,
947120096,0.626,,,,,,,1.09,,,...,,,,,,,,,,
947120097,0.161,,,,,,,1.00,,,...,,,,,,,,,,


In [196]:
df_all = pd.concat([df_geom, spatial_covs], axis = 1, join = 'inner')

In [200]:
spatial_gpd = gpd.GeoDataFrame(df_all)
spatial_gpd.crs

<Geographic 2D CRS: EPSG:4269>
Name: NAD83
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: North America - onshore and offshore: Canada - Alberta; British Columbia; Manitoba; New Brunswick; Newfoundland and Labrador; Northwest Territories; Nova Scotia; Nunavut; Ontario; Prince Edward Island; Quebec; Saskatchewan; Yukon. Puerto Rico. United States (USA) - Alabama; Alaska; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Hawaii; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana; Maine; Maryland; Massachusetts; Michigan; Minnesota; Mississippi; Missouri; Montana; Nebraska; Nevada; New Hampshire; New Jersey; New Mexico; New York; North Carolina; North Dakota; Ohio; Oklahoma; Oregon; Pennsylvania; Rhode Island; South Carolina; South Dakota; Tennessee; Texas; Utah; Vermont; Virginia; Washington; West Virginia; Wisconsin; Wyoming. US Virgin Islands. British Virgin Islands

In [252]:
#spatial_gpd[:25].explore()

### Import subcatchments (aka "wings")

In [254]:
subcatchment_files = glob.glob(r'../DATA/NHDPlusPN/NHDPlus17/NHDPlusCatchment/*.dbf') 
subcatchment_files

['../DATA/NHDPlusPN/NHDPlus17/NHDPlusCatchment/Catchment.dbf',
 '../DATA/NHDPlusPN/NHDPlus17/NHDPlusCatchment/featureidgridcode.dbf']

In [255]:
subcs = gpd.read_file(subcatchment_files[0])
subcs[:3]

Unnamed: 0,GRIDCODE,FEATUREID,SOURCEFC,AreaSqKM,geometry
0,287844,24324012,NHDFlowline,1.7658,"POLYGON ((-114.02984 45.94511, -114.03022 45.9..."
1,287845,24324160,NHDFlowline,2.9475,"POLYGON ((-113.77339 45.90888, -113.77354 45.9..."
2,287846,24324144,NHDFlowline,4.9176,"POLYGON ((-113.9427 45.90121, -113.94291 45.90..."


In [256]:
subcs['catchment_ID'] = subcs['FEATUREID']
subcs

Unnamed: 0,GRIDCODE,FEATUREID,SOURCEFC,AreaSqKM,geometry,catchment_ID
0,287844,24324012,NHDFlowline,1.7658,"POLYGON ((-114.02984 45.94511, -114.03022 45.9...",24324012
1,287845,24324160,NHDFlowline,2.9475,"POLYGON ((-113.77339 45.90888, -113.77354 45.9...",24324160
2,287846,24324144,NHDFlowline,4.9176,"POLYGON ((-113.9427 45.90121, -113.94291 45.90...",24324144
3,287847,24324280,NHDFlowline,0.0423,"POLYGON ((-114.21125 45.86419, -114.21132 45.8...",24324280
4,287848,24324608,NHDFlowline,2.5938,"POLYGON ((-114.09189 45.74334, -114.09226 45.7...",24324608
...,...,...,...,...,...,...
231693,522958,23989721,NHDFlowline,9.3834,"POLYGON ((-122.50254 47.40671, -122.50292 47.4...",23989721
231694,522959,23989715,NHDFlowline,16.4007,"POLYGON ((-122.9147 47.22345, -122.91356 47.22...",23989715
231695,522960,23989745,NHDFlowline,0.4815,"MULTIPOLYGON (((-122.63292 48.27784, -122.6330...",23989745
231696,522961,24255303,NHDFlowline,0.2196,"MULTIPOLYGON (((-120.9911 48.70464, -120.99099...",24255303


In [257]:
if spatial_gpd.crs == subcs.crs: 
    df = pd.concat([spatial_gpd, subcs.set_index('FEATUREID')['catchment_ID']], join = 'outer', axis = 1).reindex(spatial_gpd.index)
else: print('Error: conflicting CRS')

In [258]:
#df[:50].explore()

In [262]:
df_cols = df.columns.to_list()
new_cols = df_cols[:2] + ['catchment_ID'] + df_cols[3:-1] + ['geometry']
#new_cols

In [246]:
layer1 = df[new_cols]
layer1

Unnamed: 0_level_0,huc10,FCODE,catchment_ID,cov.length_km,cov.stream_order,cov.path_length,cov.tot.da.sqkm,cov.elev_max_smo,cov.elev_min_smo,cov.slope,...,cov.canopy_rca,cov.canopy_line,cov.area_sqkm,cov.canopy_ws,cov.dam_name,cov.proportion_dam_influenced,cov.distance_below_dam,cov.dam_hgt_m,cov.dam_main_use,geometry
COMID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22877591,1.701010e+09,55800.0,22877591,0.247,6.0,1290.195,28030.5369,56102.0,55932.0,0.006883,...,89.000000,90.0,1.9161,88.592748,Libby,0.831332,71.294,129.0,Hydroelectricity,"LINESTRING Z (-115.97665 48.55862 0, -115.9780..."
22877593,1.701010e+09,55800.0,22877593,0.055,3.0,1290.442,58.4136,56129.0,56102.0,0.004909,...,90.000000,90.0,0.4284,90.227047,,,,,,"LINESTRING Z (-115.97724 48.55831 0, -115.9766..."
22877595,1.701010e+09,46006.0,22877595,1.999,3.0,1290.497,58.3101,73226.0,56129.0,0.085528,...,90.500000,90.0,1.0629,90.227450,,,,,,"LINESTRING Z (-116.00101 48.55763 0, -116.0006..."
22877597,1.701010e+09,46006.0,22877597,0.947,3.0,1292.496,54.7596,76725.0,73226.0,0.036948,...,90.500000,90.0,2.2950,90.209778,,,,,,"LINESTRING Z (-116.01247 48.55595 0, -116.0086..."
22877599,1.701010e+09,46006.0,22877599,1.194,3.0,1293.443,48.1401,81497.0,76725.0,0.039966,...,90.333333,90.0,1.2087,90.149263,,,,,,"LINESTRING Z (-116.02837 48.55589 0, -116.0243..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
947120094,1.712001e+09,46006.0,947120094,0.364,2.0,40.358,11.2266,139672.0,139048.0,0.017143,...,81.750000,80.5,9.3834,82.665564,,,,,,"LINESTRING Z (-120.32129 42.38282 0, -120.3207..."
947120095,1.712001e+09,46006.0,947120095,0.418,2.0,39.940,11.3337,139048.0,138629.0,0.010024,...,80.500000,80.0,16.4007,82.645100,,,,,,"LINESTRING Z (-120.31769 42.38113 0, -120.3175..."
947120098,1.712001e+09,46006.0,947120098,1.218,2.0,40.722,10.9971,145858.0,139672.0,0.050788,...,82.333333,82.0,0.4815,82.684671,,,,,,"LINESTRING Z (-120.33013 42.3911 0, -120.32983..."
947120100,1.712001e+09,46003.0,947120100,2.893,3.0,39.940,44.0622,140135.0,138629.0,0.005206,...,81.000000,78.0,0.2196,80.865589,,,,,,"LINESTRING Z (-120.30641 42.35695 0, -120.3063..."


In [264]:
layer1.catchment_ID.isnull().unique()

array([False])

## Layer 2: Subcatchment geometry table (aka "wings")

In [223]:
#subcs[:50].explore()

In [267]:
df2 = comids_all.copy()

In [275]:
layer2 = subcs.loc[comids_all.index][['FEATUREID', 'geometry']]
layer2



KeyError: "None of [Index([   -81754,    -81753,    -81752,    -81751,    -81750,    -81749,\n          -81748,    -81747,    -81746,    -81745,\n       ...\n       947120087, 947120088, 947120089, 947120090, 947120091, 947120094,\n       947120095, 947120098, 947120100, 947120101],\n      dtype='int64', name='COMID', length=228673)] are in the [index]"

In [5]:
len(spatial_covs.columns)

76

In [280]:
dbfwbd_files = glob.glob(r'../DATA/NHDPlusPN/NHDPlus17/WBDSnapshot/*/*.dbf') 
dbfwbd_files

['../DATA/NHDPlusPN/NHDPlus17/WBDSnapshot/WBD/WBD_Subwatershed.dbf']

In [281]:
wbds = gpd.read_file(dbfwbd_files[0])
wbds[:3]

Unnamed: 0,OBJECTID,HUC_8,HUC_10,HUC_12,ACRES,NCONTRB_A,HU_10_GNIS,HU_12_GNIS,HU_10_NAME,HU_10_MOD,...,HU_12_NAME,HU_12_MOD,HU_12_TYPE,META_ID,STATES,GlobalID,SHAPE_Leng,SHAPE_Area,GAZ_ID,geometry
0,29788,17010206,1701020601,170102060103,31609.155982,0.0,,,North Fork Flathead River-Kishenehn Creek,NM,...,North Fork Flathead River-Kishenehn Creek,NM,S,MT08,"MT,CAN",{33FBF763-C7B5-4239-AC93-30FCDAF6560C},1.119923,0.015712,-70260,"POLYGON ((-114.22732 48.99958, -114.22856 48.9..."
1,29792,17010106,1701010601,170101060101,26139.003487,0.0,,,Wigwam River,NM,...,Wigwam River,NM,S,MT07,"MT,CAN",{FE944240-98C7-4BDC-BC4E-BC95EA8A69F3},0.706215,0.012991,-70261,"POLYGON ((-114.72947 49.00001, -114.72966 48.9..."
2,29794,17010101,1701010104,170101010404,17616.666849,0.0,,,Young Creek-Pinkham Creek,NM,...,Phillips Creek,NM,S,MT01,MT,{2E15B098-D388-4F7B-BE46-DD86DBDAA4E7},0.879199,0.008756,-70262,"POLYGON ((-114.91357 49.00007, -114.91414 48.9..."


In [282]:
wbds.columns

Index(['OBJECTID', 'HUC_8', 'HUC_10', 'HUC_12', 'ACRES', 'NCONTRB_A',
       'HU_10_GNIS', 'HU_12_GNIS', 'HU_10_NAME', 'HU_10_MOD', 'HU_10_TYPE',
       'HU_12_DS', 'HU_12_NAME', 'HU_12_MOD', 'HU_12_TYPE', 'META_ID',
       'STATES', 'GlobalID', 'SHAPE_Leng', 'SHAPE_Area', 'GAZ_ID', 'geometry'],
      dtype='object')

In [283]:
wbds['HUC_12'].value_counts()

HUC_12
171003120302    335
171002010105    121
171003060401    115
171003060106    108
171003040308     84
               ... 
170703030904      1
170501161104      1
170703040801      1
171200020204      1
170200010203      1
Name: count, Length: 8240, dtype: int64

In [271]:
df

Unnamed: 0_level_0,huc10,FCODE,geometry,cov.length_km,cov.stream_order,cov.path_length,cov.tot.da.sqkm,cov.elev_max_smo,cov.elev_min_smo,cov.slope,...,cov.canopy_rca,cov.canopy_line,cov.area_sqkm,cov.canopy_ws,cov.dam_name,cov.proportion_dam_influenced,cov.distance_below_dam,cov.dam_hgt_m,cov.dam_main_use,catchment_ID
COMID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22877591,1.701010e+09,55800.0,"LINESTRING Z (-115.97665 48.55862 0, -115.9780...",0.247,6.0,1290.195,28030.5369,56102.0,55932.0,0.006883,...,89.000000,90.0,1.9161,88.592748,Libby,0.831332,71.294,129.0,Hydroelectricity,22877591
22877593,1.701010e+09,55800.0,"LINESTRING Z (-115.97724 48.55831 0, -115.9766...",0.055,3.0,1290.442,58.4136,56129.0,56102.0,0.004909,...,90.000000,90.0,0.4284,90.227047,,,,,,22877593
22877595,1.701010e+09,46006.0,"LINESTRING Z (-116.00101 48.55763 0, -116.0006...",1.999,3.0,1290.497,58.3101,73226.0,56129.0,0.085528,...,90.500000,90.0,1.0629,90.227450,,,,,,22877595
22877597,1.701010e+09,46006.0,"LINESTRING Z (-116.01247 48.55595 0, -116.0086...",0.947,3.0,1292.496,54.7596,76725.0,73226.0,0.036948,...,90.500000,90.0,2.2950,90.209778,,,,,,22877597
22877599,1.701010e+09,46006.0,"LINESTRING Z (-116.02837 48.55589 0, -116.0243...",1.194,3.0,1293.443,48.1401,81497.0,76725.0,0.039966,...,90.333333,90.0,1.2087,90.149263,,,,,,22877599
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
947120094,1.712001e+09,46006.0,"LINESTRING Z (-120.32129 42.38282 0, -120.3207...",0.364,2.0,40.358,11.2266,139672.0,139048.0,0.017143,...,81.750000,80.5,9.3834,82.665564,,,,,,947120094
947120095,1.712001e+09,46006.0,"LINESTRING Z (-120.31769 42.38113 0, -120.3175...",0.418,2.0,39.940,11.3337,139048.0,138629.0,0.010024,...,80.500000,80.0,16.4007,82.645100,,,,,,947120095
947120098,1.712001e+09,46006.0,"LINESTRING Z (-120.33013 42.3911 0, -120.32983...",1.218,2.0,40.722,10.9971,145858.0,139672.0,0.050788,...,82.333333,82.0,0.4815,82.684671,,,,,,947120098
947120100,1.712001e+09,46003.0,"LINESTRING Z (-120.30641 42.35695 0, -120.3063...",2.893,3.0,39.940,44.0622,140135.0,138629.0,0.005206,...,81.000000,78.0,0.2196,80.865589,,,,,,947120100


In [276]:
for h10 in [df.huc10.unique()[0]]:
    print(h10)

1701010117.0


In [277]:
    cur_comids = df.loc[df.huc10 == h10]
    cur_comids

Unnamed: 0_level_0,huc10,FCODE,geometry,cov.length_km,cov.stream_order,cov.path_length,cov.tot.da.sqkm,cov.elev_max_smo,cov.elev_min_smo,cov.slope,...,cov.canopy_rca,cov.canopy_line,cov.area_sqkm,cov.canopy_ws,cov.dam_name,cov.proportion_dam_influenced,cov.distance_below_dam,cov.dam_hgt_m,cov.dam_main_use,catchment_ID
COMID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22877591,1.701010e+09,55800.0,"LINESTRING Z (-115.97665 48.55862 0, -115.9780...",0.247,6.0,1290.195,28030.5369,56102.0,55932.0,0.006883,...,89.000000,90.000000,1.9161,88.592748,Libby,0.831332,71.294,129.0,Hydroelectricity,22877591
22877593,1.701010e+09,55800.0,"LINESTRING Z (-115.97724 48.55831 0, -115.9766...",0.055,3.0,1290.442,58.4136,56129.0,56102.0,0.004909,...,90.000000,90.000000,0.4284,90.227047,,,,,,22877593
22877595,1.701010e+09,46006.0,"LINESTRING Z (-116.00101 48.55763 0, -116.0006...",1.999,3.0,1290.497,58.3101,73226.0,56129.0,0.085528,...,90.500000,90.000000,1.0629,90.227450,,,,,,22877595
22877597,1.701010e+09,46006.0,"LINESTRING Z (-116.01247 48.55595 0, -116.0086...",0.947,3.0,1292.496,54.7596,76725.0,73226.0,0.036948,...,90.500000,90.000000,2.2950,90.209778,,,,,,22877597
22877599,1.701010e+09,46006.0,"LINESTRING Z (-116.02837 48.55589 0, -116.0243...",1.194,3.0,1293.443,48.1401,81497.0,76725.0,0.039966,...,90.333333,90.000000,1.2087,90.149263,,,,,,22877599
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22882357,1.701010e+09,46003.0,"LINESTRING Z (-116.03921 48.58215 0, -116.0389...",1.237,1.0,1296.116,2.8719,88984.0,83774.0,0.047886,...,91.000000,91.000000,3.1842,91.000000,,,,,,22882357
22882359,1.701010e+09,46006.0,"LINESTRING Z (-116.02895 48.57358 0, -116.0286...",2.673,1.0,1293.443,5.5350,83774.0,76725.0,0.026371,...,90.333333,90.333333,2.1357,90.679241,,,,,,22882359
22882433,1.701010e+09,46006.0,"LINESTRING Z (-115.89488 48.45376 0, -115.8950...",0.266,4.0,1305.372,213.0471,58746.0,58379.0,0.031638,...,89.000000,87.000000,4.8285,89.702518,,,,,,22882433
22882435,1.701010e+09,46006.0,"LINESTRING Z (-115.89609 48.45207 0, -115.8949...",0.239,4.0,1305.617,212.1471,59001.0,58746.0,0.028652,...,87.500000,87.000000,6.7212,89.705498,,,,,,22882435


In [286]:
len(wbds)

9823

In [289]:
len(wbds.HUC_12.unique())

8241

In [309]:
#duplicates = df[df.duplicated(subset=['A'], keep=False)]
duplicates = wbds[wbds.duplicated(subset = ['HUC_12'], keep = False)]
dups1 = duplicates.loc[duplicates.HUC_12 == duplicates.HUC_12.unique()[2]]
len(dups1)

27

In [310]:
dups1.explore()

In [285]:
    for h12 in wbds.rows()[:3]:
        print(h12)

AttributeError: 'GeoDataFrame' object has no attribute 'rows'

In [None]:
        subcs[cur_comids.index].centroid.within(huc12) #should return a df of a several subcs in one huc12
        #Assign huc12 to df
        #frame_list.append(df)
        

In [12]:
covariate_metadata = pd.read_csv('../DATA/Stream_Temp/covariate_metadata.csv', nrows = 100)
covariate_metadata.tail(5)

Unnamed: 0,COVARIATE,UNITS,SYMBOL,DESCRIPTION,DATA SOURCE (Retrospective),DATA SOURCE (GCM Scenarios)
95,lookup,unitless,not used,"concatination of reach identifier, year, and d...",,
96,tim.date,date,not used,date,,
97,tim.year,year,not used,year,,
98,cov.canopy_rca,percent,not used,proportional canopy cover in the reach contrib...,need to check,
99,cov.canopy_ws,percent,not used,proportional canopy cover in the upstream wate...,need to check,


In [13]:
covariate_metadata.COVARIATE.to_list()

['COMID',
 'cov.antec_air_temp',
 'cov.air_temp_mean_ws',
 'cov.SWE_ws',
 'cov.daylength_hours',
 'tim.doy',
 'cov.std_mean_flow',
 'cov.SWE_1Apr',
 'cov.lat_v',
 'cov.elev_mean_smo',
 'cov.elev_diff',
 'cov.area_km2_ws_log',
 'cov.BFI_cat',
 'cov.slope',
 'cov.pct_ow_ws',
 'cov.pct_wet_all_ws',
 'cov.pct_ice_ws',
 'cov.pct_for_all_cat_rip100m',
 'cov.canopy_line',
 'cov.pct_urb_all_ws',
 'cov.pct_extru_vol_ws',
 'cov.precip_cat',
 'cov.air_temp_range_cat',
 nan,
 'cov.air_temp_max_cat',
 'cov.air_temp_max_ws',
 'cov.air_temp_mean_cat',
 'cov.air_temp_min_cat',
 'cov.air_temp_min_ws',
 'cov.area_km2_ws',
 'cov.area_sqkm',
 'cov.BFI_ws',
 'cov.canal_dens_cat',
 'cov.canal_dens_ws',
 'cov.dam_dens_cat',
 'cov.dam_dens_ws',
 'cov.dam_hgt_m',
 'cov.dam_main_use',
 'cov.dam_name',
 'cov.dam_nrm_stor_cat',
 'cov.dam_nrm_stor_ws',
 'cov.distance_below_dam',
 'cov.elev_max_smo',
 'cov.elev_min_smo',
 'cov.elev_cat',
 'cov.elev_ws',
 'cov.length_km',
 'cov.om_cat',
 'cov.om_ws',
 'cov.path_leng

In [14]:
set(spatial_covs.columns).difference(set(covariate_metadata.COVARIATE))

set()

In [15]:
temp_dir = '/Volumes/Elements/URycki/temp_data/'

In [16]:
temp_dir_retro = os.path.join(temp_dir, 'preds_retro/')
huc6_files = glob.glob(os.path.join(temp_dir_retro, 'st_pred_*.7z'))
huc6_files.sort()
huc6_files

['/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170101.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170102.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170103.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170200.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170300.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170401.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170402.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170501.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170502.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170601.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170602.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170603.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170701.7z',
 '/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170702.7z',
 '/Volumes/Elements/URycki/temp_da

In [17]:
# List of HUC10s from RSX RME_scrape (assuming this is the comprehensive list?)
rsx_hucs_df = pd.read_csv('../../Repos/DATA/Region17_HUC10s.csv', skiprows = [0], names = ['huc10'])
rsx_hucs = rsx_hucs_df.huc10.to_list()
print(len(rsx_hucs))
rsx_hucs[:5]

1526


[1710010301, 1710030501, 1702001003, 1710010501, 1702000806]

## Build retrospective projects

In [18]:
comid_by_huc = {}

### Zipped database 
1. daily stream temperature retrospective
2. daily covariates retrospective

In [19]:
def extract_7zs(archive):
    with py7zr.SevenZipFile(archive, mode='r') as z:
        z.extractall(path = os.path.join(temp_dir_retro, 'tmp'))
    return None

In [20]:
file = huc6_files[4]
file

'/Volumes/Elements/URycki/temp_data/preds_retro/st_pred_170300.7z'

In [None]:
extract_7zs(file)

In [22]:
import sqlite3

In [23]:
huc_files = glob.glob(os.path.join(temp_dir_retro, 'tmp', '*.csv'))

In [24]:
cur_file = huc_files[0]
cur_file

'/Volumes/Elements/URycki/temp_data/preds_retro/tmp/st_pred_1703000101.csv'

In [25]:
cur_huc = cur_file[-14:-4]
cur_huc

'1703000101'

In [70]:
df_raw = pd.read_csv(cur_file, parse_dates = ['tim.date'], engine = 'python', on_bad_lines = 'warn')
df = df_raw.sort_values(by = ['COMID', 'tim.date'])

In [71]:
df[:5]

Unnamed: 0,lookup,COMID,tim.date,cov.antec_air_temp,cov.std_mean_flow,prd.stream_temp
0,24125807_1990_1,24125807,1990-01-02,,1.054899,
337,24125807_1990_2,24125807,1990-01-03,,1.043976,
674,24125807_1990_3,24125807,1990-01-04,,1.032348,
1011,24125807_1990_4,24125807,1990-01-05,,1.020583,
1348,24125807_1990_5,24125807,1990-01-06,,1.009277,


In [73]:
df.tail()

Unnamed: 0,lookup,COMID,tim.date,cov.antec_air_temp,cov.std_mean_flow,prd.stream_temp
3937507,25006827_2021_361,25006827,2021-12-28,-6.63719,0.821878,
3937844,25006827_2021_362,25006827,2021-12-29,-7.173562,0.821878,
3938181,25006827_2021_363,25006827,2021-12-30,-7.673876,0.821878,
3938518,25006827_2021_364,25006827,2021-12-31,-8.044447,0.821878,
3938855,25006827_2021_365,25006827,2022-01-01,-8.225133,0.821878,


In [75]:
temp = df[['COMID', 'tim.date', 'prd.stream_temp']]
temp

Unnamed: 0,COMID,tim.date,prd.stream_temp
0,24125807,1990-01-02,
337,24125807,1990-01-03,
674,24125807,1990-01-04,
1011,24125807,1990-01-05,
1348,24125807,1990-01-06,
...,...,...,...
3937507,25006827,2021-12-28,
3937844,25006827,2021-12-29,
3938181,25006827,2021-12-30,
3938518,25006827,2021-12-31,


In [81]:
comid_by_huc[cur_huc] = temp.COMID.unique().tolist()

In [84]:
cur_huc

'1703000101'

In [88]:
temp_dir

'/Volumes/Elements/URycki/temp_data/'

In [90]:
conn = sqlite3.connect(os.path.join(temp_dir, 'Outputs', 'retro', f'retrospective_daily_{cur_huc}.db'))

# Write the DataFrame to a table in the database
temp.to_sql("stream_temperature", conn, if_exists="replace", index=False)

# Close the connection
conn.close()

In [99]:
#test
gpd.read_file(os.path.join(temp_dir, 'Outputs', 'retro', f'retrospective_daily_{cur_huc}.db'))

Unnamed: 0,COMID,tim.date,prd.stream_temp
0,24125807,1990-01-02,
1,24125807,1990-01-03,
2,24125807,1990-01-04,
3,24125807,1990-01-05,
4,24125807,1990-01-06,
...,...,...,...
3938851,25006827,2021-12-28,
3938852,25006827,2021-12-29,
3938853,25006827,2021-12-30,
3938854,25006827,2021-12-31,


In [108]:
table2 = df


In [109]:
conn = sqlite3.connect(os.path.join(temp_dir, 'Outputs', 'retro', f'retrospective_daily_{cur_huc}.db'))

# Write the DataFrame to a table in the database
table2.to_sql("test_table", conn, if_exists="replace", index=False)

# Close the connection
conn.close()

In [113]:
gpd.read_file(os.path.join(temp_dir, 'Outputs', 'retro', f'retrospective_daily_{cur_huc}.db'), layer = 'test_table')

Unnamed: 0,lookup,COMID,tim.date,cov.antec_air_temp,cov.std_mean_flow,prd.stream_temp
0,24125807_1990_1,24125807,1990-01-02,,1.054899,
1,24125807_1990_2,24125807,1990-01-03,,1.043976,
2,24125807_1990_3,24125807,1990-01-04,,1.032348,
3,24125807_1990_4,24125807,1990-01-05,,1.020583,
4,24125807_1990_5,24125807,1990-01-06,,1.009277,
...,...,...,...,...,...,...
3938851,25006827_2021_361,25006827,2021-12-28,-6.637190,0.821878,
3938852,25006827_2021_362,25006827,2021-12-29,-7.173562,0.821878,
3938853,25006827_2021_363,25006827,2021-12-30,-7.673876,0.821878,
3938854,25006827_2021_364,25006827,2021-12-31,-8.044447,0.821878,


In [112]:
gpd.list_layers(os.path.join(temp_dir, 'Outputs', 'retro', f'retrospective_daily_{cur_huc}.db'))

Unnamed: 0,name,geometry_type
0,stream_temperature,
1,test_table,


In [None]:
Next: see if it saves space to use one db file instead of two.

In [42]:
temp[~temp.isnull().any(axis =1)]

Unnamed: 0_level_0,tim.date,prd.stream_temp
COMID,Unnamed: 1_level_1,Unnamed: 2_level_1
24127141,1990-01-02,0.815321
25006797,1990-01-02,1.030870
24127141,1990-01-03,1.113983
25006797,1990-01-03,1.814799
24127141,1990-01-04,3.167158
...,...,...
25006819,2021-10-01,1.977966
25006821,2021-10-01,0.914263
25006823,2021-10-01,0.225544
25006825,2021-10-01,1.238325


In [37]:
df[~df.isnull()]

Unnamed: 0_level_0,tim.date,prd.stream_temp
COMID,Unnamed: 1_level_1,Unnamed: 2_level_1
24125807,1990-01-02,
24125809,1990-01-02,
24125811,1990-01-02,
24125813,1990-01-02,
24125815,1990-01-02,
...,...,...
25006819,2022-01-01,
25006821,2022-01-01,
25006823,2022-01-01,
25006825,2022-01-01,


In [36]:
df[~df.isna().all(axis=1)]

Unnamed: 0,lookup,COMID,tim.date,cov.antec_air_temp,cov.std_mean_flow,prd.stream_temp
0,24125807_1990_1,24125807,1990-01-02,,1.054899,
1,24125809_1990_1,24125809,1990-01-02,,1.056818,
2,24125811_1990_1,24125811,1990-01-02,,1.057383,
3,24125813_1990_1,24125813,1990-01-02,,1.069315,
4,24125815_1990_1,24125815,1990-01-02,,1.078398,
...,...,...,...,...,...,...
3938851,25006819_2021_365,25006819,2022-01-01,-6.346550,0.861358,
3938852,25006821_2021_365,25006821,2022-01-01,-6.838933,0.831395,
3938853,25006823_2021_365,25006823,2022-01-01,-6.096833,1.208477,
3938854,25006825_2021_365,25006825,2022-01-01,-8.553583,0.499316,


In [137]:
3938856 rows × 6 columns

SyntaxError: invalid character '×' (U+00D7) (842876673.py, line 1)

In [113]:
df

Unnamed: 0,lookup,COMID,tim.date,cov.antec_air_temp,cov.std_mean_flow,prd.stream_temp
0,24125807_1990_1,24125807,1990-01-02,,1.054899,
1,24125809_1990_1,24125809,1990-01-02,,1.056818,
2,24125811_1990_1,24125811,1990-01-02,,1.057383,
3,24125813_1990_1,24125813,1990-01-02,,1.069315,
4,24125815_1990_1,24125815,1990-01-02,,1.078398,
...,...,...,...,...,...,...
3938851,25006819_2021_365,25006819,2022-01-01,-6.346550,0.861358,
3938852,25006821_2021_365,25006821,2022-01-01,-6.838933,0.831395,
3938853,25006823_2021_365,25006823,2022-01-01,-6.096833,1.208477,
3938854,25006825_2021_365,25006825,2022-01-01,-8.553583,0.499316,


In [19]:
huc_comids = {}

### Layer1: ComID geometry table. Feature ID (ComID), attributes (Fcode, SegID, [spatial covariates retrospective])


### Layer3: Anomaly temperature metrics (seasonal)


In [20]:
huc_comids = {}

In [21]:
cur_huc = avail_hucs[1]
cur_huc

IndexError: list index out of range

In [189]:
anoms = pd.read_csv(os.path.join(temp_dir_retro + 'seasonal_anomalies/', f'{cur_huc}_anoms.zip'), compression = 'zip')
anoms = anoms[~anoms.COMID.isnull()]
anoms

Unnamed: 0,COMID,season,2010s,1990s,2000s
1,22878865.0,fall,,,
2,22878865.0,spring,,,
3,22878865.0,summer,,,
4,22878865.0,winter,,,


In [201]:
#df.columns = ['log(gdp)' if x=='gdp' else 'cap_mod' if x=='cap' else x for x in df.columns]
anoms.columns = [f'prd.stream_temp_{c}[C]' if c == '2010s' else f'prd.stream_temp_{c}[delta_C]' if c[0].isnumeric() else c for c in anoms.columns]
anoms

Unnamed: 0,COMID,season,prd.stream_temp_2010s[C],prd.stream_temp_1990s[delta_C],prd.stream_temp_2000s[delta_C]
1,22878865.0,fall,,,
2,22878865.0,spring,,,
3,22878865.0,summer,,,
4,22878865.0,winter,,,


In [227]:
cur_comids = anoms.COMID.unique().tolist()
cur_comids

[22878865.0]

In [231]:
huc_comids[cur_huc] = cur_comids
huc_comids

{'1701010106': [22878865.0]}

In [233]:
flowlines

Unnamed: 0,COMID,FDATE,RESOLUTION,GNIS_ID,GNIS_NAME,LENGTHKM,REACHCODE,FLOWDIR,WBAREACOMI,FTYPE,FCODE,SHAPE_LENG,ENABLED,GNIS_NBR,geometry
0,9301535,1999-10-08,Medium,,,0.999,09040002011583,Uninitialized,0,StreamRiver,46006,0.011661,True,0,"LINESTRING Z (-114.05855 48.99999 0, -114.0583..."
1,22968128,2001-03-02,Medium,,,0.134,10030104009812,Uninitialized,0,StreamRiver,46003,0.001671,True,0,"LINESTRING Z (-113.00089 47.46274 0, -113.0020..."
2,22968138,2001-03-02,Medium,,,0.217,10030104009813,Uninitialized,0,StreamRiver,46003,0.002145,True,0,"LINESTRING Z (-113.0013 47.45837 0, -113.00137..."
3,23135995,2001-02-07,Medium,,,0.916,16010102003533,Uninitialized,0,StreamRiver,46003,0.010671,True,0,"LINESTRING Z (-111.06214 42.50841 0, -111.0619..."
4,23135997,2001-02-07,Medium,,,1.865,16010102003534,Uninitialized,0,StreamRiver,46003,0.019193,True,0,"LINESTRING Z (-111.04516 42.51372 0, -111.0447..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
272335,24545039,2005-08-27,Medium,,,2.847,17120009001542,Uninitialized,0,StreamRiver,46003,0.029718,True,0,"LINESTRING Z (-118.12671 42.98327 0, -118.1260..."
272336,24545041,2005-08-27,Medium,,,2.005,17120009001543,Uninitialized,0,StreamRiver,46003,0.020023,True,0,"LINESTRING Z (-118.1111 42.99525 0, -118.11175..."
272337,23706720,2001-02-28,Medium,,,4.217,18010201002017,Uninitialized,0,StreamRiver,46003,0.043621,True,0,"LINESTRING Z (-121.60348 43.32724 0, -121.6034..."
272338,23931942,2001-03-14,Medium,1127294,South Fork Keene Creek,0.029,18010206012384,Uninitialized,0,StreamRiver,46006,0.000284,True,0,"LINESTRING Z (-122.47671 42.08355 0, -122.4765..."


In [243]:
flowlines[flowlines.COMID.isin(cur_comids)][['FCODE', 

Unnamed: 0,COMID,FDATE,RESOLUTION,GNIS_ID,GNIS_NAME,LENGTHKM,REACHCODE,FLOWDIR,WBAREACOMI,FTYPE,FCODE,SHAPE_LENG,ENABLED,GNIS_NBR,geometry
687,22878865,2001-02-28,Medium,793620,Young Creek,2.324,17010101000584,With Digitized,22886855,ArtificialPath,55800,0.028278,True,0,"LINESTRING Z (-115.18951 48.9611 0, -115.18529..."


In [21]:
hucs = [f[-13:-3] for f in temp_files_gcms]

In [22]:
# Select columns with 'float64' dtype  
float64_cols = list(huc_anoms.select_dtypes(include='float64'))

# The same code again calling the columns
huc_anoms[float64_cols] = huc_anoms[float64_cols].astype('float32')

NameError: name 'huc_anoms' is not defined

### Covariate anomalies

In [22]:
temp_dir_retro

NameError: name 'temp_dir_retro' is not defined

In [22]:
cov_retro_dir = os.path.join(temp_dir, 'cov_retro')
cov_retro_dir

'/Volumes/Elements/URycki/temp_data/cov_retro'

In [32]:
#retro covariates
hucs = [f[-14:-4] for f in glob.glob(cov_retro_dir + '/*')]
hucs.sort()

In [73]:
temp_files_retro = glob.glob(temp_dir_retro + 'seasonal_anomalies/' + '*.zip')
print(len(temp_files_retro))
print(temp_files_retro[:3])
temp_files_retro.sort()

1536
['/Volumes/Elements/URycki/temp_data/preds_retro/seasonal_anomalies/1701010107_anoms.zip', '/Volumes/Elements/URycki/temp_data/preds_retro/seasonal_anomalies/1701010108_anoms.zip', '/Volumes/Elements/URycki/temp_data/preds_retro/seasonal_anomalies/1701010106_anoms.zip']
