# Data Preperation
The resulting data from this notebook will combine the positional data (latitude, longitude, town) of the ChemDataForJeffOlson.csv and all of the tables from the Land use survey.
## Merge all tables from the land use survey
Here we run through the files from the land use survey. All files have the same colum names and thus we just need to append them all. I created a dataframe from the first file, then ran through the rest of them turning each into a dataframe then appending it to the first.
### Section 1: Data paths and collection

In [1]:
import os
import re
import numpy as np
import pandas as pd
import IPython
from IPython.display import display
from sklearn.preprocessing import MinMaxScaler
from urllib.request import urlretrieve
import geopandas as gpd

pd.set_option('display.max_columns', None)

In [2]:
# set the path to the folder with all the tables
land_use_folder = 'assets/Tables'

# set the path to the chem data file
chem_data_file_path = 'assets/ChemDataforJeffOlson.csv'

# set the path to the population data (Optional)
USE_POPULATION = True
population_path = 'assets/HS-STAT-Population-of-Vermont-towns-1930-2019.xls'

# set the path to the characteristics data
characteristics_path = 'assets/Characteristic.csv'

# set the save path to the resulting cleaned chem data
chem_data_save_path = 'assets/chem_data_merged.csv'

# set the save path to the resulting survey data file
survay_save_path = 'assets/combined_tables.csv'

# set geojson path
geojson_path = 'assets/Lakes_Inventory.geojson'

In [3]:
URL_HEALTH = 'https://www.healthvermont.gov/sites/default/files/documents/xls/HS-STAT-Population-of-Vermont-towns-1930-2019.xls'

URL_LAKES = 'https://anrmaps.vermont.gov/arcgis/rest/services/Open_Data/OPENDATA_ANR_WATER_SP_NOCACHE_v2/MapServer/208/query?where=1%3D1&outFields=*&outSR=4326&f=json'

# This fuction downloads the geojson data from VERMONT OPEN GEODATA PORTAL.
def get_lakes_inventory_geojson(filename=geojson_path, url=URL_LAKES):
    if not os.path.exists(filename):
        urlretrieve(url, filename)
    data = gpd.read_file(r'assets/Lakes_Inventory.geojson')
    return data

# This fuction downloads the population data.
def get_pop_data(filename=population_path, url=URL_HEALTH):
    if not os.path.exists(filename):
        urlretrieve(url, filename)
    
    
    population = pd.read_excel(population_path, skiprows=4, index_col='CTC')
    # lets take the relevent columns from the population dataset and melt them so we ca easily join the dataframes
    population = population[['NAME'] + list(population.columns[-32:])]
    population.rename(columns={'NAME': 'Town'}, inplace=True)
    return population

### Sectin 2: Get GeoJson Lake Data

In [4]:
# Download geojson data for ploting in project dashboard notebook

geojson = get_lakes_inventory_geojson()
geojson.to_file(geojson_path)

### Section 3: Merge The Land Use Survey Files

In [5]:
# combine all tables from Table Folder in assets folder

def merge_tables_folder(tables_folder):
    # get the first file as data frame to append to
    file_1 = os.listdir(tables_folder)[0]
    tables_df = pd.read_excel(os.path.join(tables_folder, file_1))
    
    # add file name to df
    tables_df['from_file'] = file_1
    
    # run through the rest of the files and append them to the data frame
    for file in os.listdir(tables_folder)[1:]:
        df = pd.read_excel(os.path.join(tables_folder, file))
        df['from_file'] = file
        tables_df = pd.concat((tables_df, df), axis=0)
    
    # drop OBJECTID column and reset index
    tables_df = tables_df.drop('OBJECTID', axis=1).reset_index(drop=True)
    return tables_df

In [6]:
combined_tables_df = merge_tables_folder(land_use_folder)

In [7]:
combined_tables_df.sample(5)

Unnamed: 0,Description,Shape_Length,Shape_Area,TREE_CANOPY_acres,GRASS_SHRUBS_acres,BARE_SOIL_acres,WATER_acres,BUILDINGS_acres,ROADS_acres,OTHER_PAVED_acres,RAILROADS_acres,Ag_Hay_acres,Ag_Crops_acres,Ag_Pasture_acres,Ag_Total_acres,Imp_Bare_Soil_acres,Imp_Buildings_acres,Imp_Other_Paved_acres,Imp_Road_acres,Imp_Railroad_acres,Imp_Total_acres,Shrub_Shrubs_acres,Shrub_Total_acres,TC_Coniferous_acres,TC_Deciduous_acres,TC_Total_acres,Wet_Emergent_acres,Wet_Forested_acres,Wet_Scrub_Shrub_acres,Wet_Total_acres,from_file
562,PERCHBENSON_Watershed,4762.502934,351507.5,84.315129,1.624839,0.004448,0.073205,0.256804,0.412048,0.171058,0.0,0.0,0.0,0.0,0.0,0.008272,0.374661,0.233888,1.606133,0.0,2.222954,0.0,0.0,51.282889,33.123739,84.406628,0.0,5.963683,0.0,5.963683,AOIs_PERCHBENSON.xlsx
206,FAIRFIELD_Flowline100ft,60006.128374,1898898.0,345.933224,73.818542,0.415878,45.165049,0.275399,1.539835,2.014215,0.0,12.516835,0.529675,0.520155,13.566665,0.610306,0.367079,2.561858,1.998346,0.0,5.537589,9.078908,9.078908,92.561924,253.616193,346.178117,61.41001,47.731033,10.571689,119.712732,AOIs_FAIRFIELD.xls
185,Watershed_ELLIGO,25511.912993,11651990.0,2454.565811,379.232599,6.481688,12.845568,3.31374,15.095274,7.733583,0.0,175.924124,9.916694,0.684604,186.525422,6.947631,3.635175,9.232956,24.650632,0.0,44.466395,24.69091,24.69091,872.432047,1584.548755,2456.980802,48.476157,285.878092,14.877754,349.232003,AOIs_ELLIGO.xls
137,DANIELS_Waterbody100ft,4530.487616,67842.24,10.567445,4.625991,0.006857,0.798273,0.517253,0.008958,0.238765,0.0,0.0,0.0,0.0,0.0,0.006283,0.56428,0.302623,0.0107,0.0,0.883886,1.368623,1.368623,8.675745,1.926279,10.602023,1.803818,5.137762,0.53468,7.47626,AOIs_DANIELS.xls
136,DANIELS_Flowline100ft,8181.318163,248439.3,43.077135,14.310098,0.568156,3.192102,0.001792,0.208371,0.026873,0.0,1.56895,1.352652,0.379971,3.301573,0.567756,0.002486,0.031334,0.239174,0.0,0.84075,4.066466,4.066466,29.038364,14.140515,43.17888,9.372987,25.105755,3.182699,37.661441,AOIs_DANIELS.xls


Split the Description column to get LakeIDs. The Description column has the LakeID attached to a description. depending on the lake they may be LakeID then description or description then LakeID. the function below splits on an underscore and takes the part of the newly created list that is all uppercase as the LakeID and leaves the rest as the description.

In [8]:
# now I want to solit the description column in to two columns 
# one corresponds to the title area and the other corresponds to the description
# depending on the folder they are in different orders

def split_description(df):
    df['Description'] = df['Description'].str.split('_')
    df['LakeID'] = df['Description'].apply(lambda row: row[0] if row[0].isupper() else row[1])
    df['Description'] = df['Description'].apply(lambda row: row[0] if not row[0].isupper() else row[1])
    return df

In [9]:
combined_tables_df = split_description(combined_tables_df)
combined_tables_df.sample(5)

Unnamed: 0,Description,Shape_Length,Shape_Area,TREE_CANOPY_acres,GRASS_SHRUBS_acres,BARE_SOIL_acres,WATER_acres,BUILDINGS_acres,ROADS_acres,OTHER_PAVED_acres,RAILROADS_acres,Ag_Hay_acres,Ag_Crops_acres,Ag_Pasture_acres,Ag_Total_acres,Imp_Bare_Soil_acres,Imp_Buildings_acres,Imp_Other_Paved_acres,Imp_Road_acres,Imp_Railroad_acres,Imp_Total_acres,Shrub_Shrubs_acres,Shrub_Total_acres,TC_Coniferous_acres,TC_Deciduous_acres,TC_Total_acres,Wet_Emergent_acres,Wet_Forested_acres,Wet_Scrub_Shrub_acres,Wet_Total_acres,from_file,LakeID
9,Buffer250ftWaterbody,4804.641049,175372.8,40.981067,0.885501,0.029158,1.074907,0.027058,0.047568,0.28868,0.0,0.0,0.0,0.0,0.0,0.05798,0.031631,0.671306,0.179882,0.0,0.940799,0.0,0.0,12.842655,28.144976,40.987631,0.0,4.522766,0.0,4.522766,AOIs_ADAMSWOODFD.xls,ADAMSWOODFD
290,Watershed,12682.148331,2749314.0,512.768574,148.109054,0.608311,4.321743,2.883839,7.044778,3.632567,0.0,106.989853,0.0,0.0,106.989853,0.610215,3.183045,4.268437,11.767394,0.0,19.82909,0.0,0.0,280.154146,233.392378,513.546524,1.66048,66.581806,4.232227,72.474513,AOIs_HALLS.xls,HALLS
146,Flowline100ft,6324.935801,186845.1,21.48127,21.78107,0.029035,1.426784,0.230302,0.342426,0.869686,0.0,16.512764,0.0,0.0,16.512764,0.029335,0.237935,1.036292,0.446943,0.0,1.750505,0.0,0.0,5.8843,15.698429,21.582729,1.001459,3.334317,0.164782,4.500559,AOIs_DERBY.xls,DERBY
260,Watershed,24028.76018,2912108.0,610.265952,92.323741,4.529188,3.902776,1.772608,1.988083,4.816818,0.0,44.487735,0.0,0.0,44.487735,5.400331,1.897946,5.63807,5.357752,0.0,18.294099,1.467393,1.467393,354.928078,255.812439,610.740517,9.529481,104.271672,2.738698,116.539851,AOIs_GREATHOSMER.xls,GREATHOSMER
310,Buffer250ftWaterbody,85222.43192,3136483.0,664.781207,51.509593,19.357773,24.128444,0.57872,8.150635,6.502321,0.0,1.40324,0.0,0.0,1.40324,32.601558,0.593396,7.295025,11.158466,0.0,51.648445,1.83436,1.83436,178.875692,486.640902,665.516594,10.010068,13.655826,0.340378,24.006271,AOIs_HARRIMANWHITHM.xls,HARRIMANWHITHM


### Section 4: Chemical Measurement Data
take the 'LakeID', 'Lat', 'Long', 'Town' from the chem data dataset. I will need to fix the LakeIDs in the combined_tables_df dataframe so that they match up with the LakeIDs in the chem dataset. This is the column that i will join on.

In [10]:
chem_data_df = pd.read_csv(chem_data_file_path)

In [11]:
# Some measurments have a very low frequency or have been measured only once or twice.
# Here we set a TRESHOLD parameter to cut off parameters with less than TRESHOLD measurments available in dataset.
 
# set treshold for number of measurments
TRESHOLD = 100

def get_frequent_measurments(df, TRESHOLD):
    counts = df.groupby(['CharacteristicID'])['Result'].count()
    mask = counts[counts.values <= TRESHOLD].index
    df = df[~df['CharacteristicID'].isin(mask)]
    return df

In [12]:
# set TRSHOLD and run this if you want to exclude infrequent measurments
TRESHOLD = 100
chem_data_df = get_frequent_measurments(chem_data_df, TRESHOLD)

In [13]:
chem_data_df.sample(5)

Unnamed: 0,LakeID,LakeStationNo,LakeStationType,Lat,Long,Town,ProjectID,VisitDate,VisitNumber,StartTime,CollectionMethodID,Depth,ActivityCategory,CharacteristicID,Symbol,Result,Calcs,ProjRemark,RemarkCode,DepthStratumCode
159967,MAIDSTONE,1,Pelagic,44.65036,-71.64738,Maidstone,SpringTP,2017-05-09 00:00:00,1,1522.0,Hydrolab,23.01,Reg,Cond,,22.7,Y,,,
106005,HARDWOOD,1,Pelagic,44.46869,-72.49955,Elmore,AcidLake,1987-06-03 00:00:00,1,1120.0,PlasticKemm,1.0,Reg,DNa,,0.53,Y,,,E
209435,SALEM,1,Pelagic,44.93,-72.1044,Derby,LaymonQC,2018-09-14 00:00:00,1,1531.0,Hydrolab,4.01,Reg,pH,,8.01,Y,,,
66153,EAST LONG,1,Pelagic,44.4475,-72.3525,Woodbury,LaymonQC,2017-08-21 00:00:00,1,1120.0,Hydrolab,5.0,Reg,DO%,,120.5,Y,,,
193334,PIGEON,1,Pelagic,44.24829,-72.32777,Groton,AcidLake,1984-10-26 00:00:00,1,1230.0,Thermister,6.0,Reg,TempC,,11.0,Y,,,


### Section 5: Merge Relevant Columns between the datasets
add populationto the chem_data merged on year. add characteristics data to chem_data merged on CharacteristicID. add latitude longitude and town to the survey dataframe merged on LakeID

In [14]:
if USE_POPULATION:    
    population = get_pop_data()

In [15]:
# now lets convert town names in the chem dataset to uppercase to match the town names in the population data set
chem_data_df['Town'] = chem_data_df['Town'].str.upper()
# we also need to extract the year from the chem data set so that we can join the population data
# to the chem data by year and Town
chem_data_df['VisitDate'] = pd.to_datetime(chem_data_df['VisitDate'])
chem_data_df['year'] = chem_data_df['VisitDate'].dt.year

In [16]:
chem_to_add = chem_data_df[['LakeID', 'Lat', 'Long', 'Town']].copy()
chem_to_add.drop_duplicates(inplace=True)

# get all lake ids that are also in usage survey and fix them
for lake_id in np.unique(chem_to_add['LakeID']):
    lake_id_fixed = re.sub('[()\s;]', '', lake_id)
    index = combined_tables_df[combined_tables_df['LakeID'] == lake_id_fixed].index
    combined_tables_df.loc[index, 'LakeID'] = lake_id


In [17]:
# merge with the chem_to_add data frame
new_data_df = combined_tables_df.merge(chem_to_add, how='left', on='LakeID')

# reorder columns
cols = list(new_data_df.columns)

to_front = ['LakeID', 'Description', 'Lat', 'Long', 'Town']
for col in to_front:
    cols.remove(col)

cols = to_front + cols
new_data_df = new_data_df.reindex(columns=cols)

new_data_df.head(5)

Unnamed: 0,LakeID,Description,Lat,Long,Town,Shape_Length,Shape_Area,TREE_CANOPY_acres,GRASS_SHRUBS_acres,BARE_SOIL_acres,WATER_acres,BUILDINGS_acres,ROADS_acres,OTHER_PAVED_acres,RAILROADS_acres,Ag_Hay_acres,Ag_Crops_acres,Ag_Pasture_acres,Ag_Total_acres,Imp_Bare_Soil_acres,Imp_Buildings_acres,Imp_Other_Paved_acres,Imp_Road_acres,Imp_Railroad_acres,Imp_Total_acres,Shrub_Shrubs_acres,Shrub_Total_acres,TC_Coniferous_acres,TC_Deciduous_acres,TC_Total_acres,Wet_Emergent_acres,Wet_Forested_acres,Wet_Scrub_Shrub_acres,Wet_Total_acres,from_file
0,ABENAKI,Watershed,43.8303,-72.2361,THETFORD,9683.033459,2335672.0,532.331074,39.85606,0.254456,1.075957,0.821377,1.521425,1.297239,0.0,16.7354,0.0,0.0,16.7354,0.309766,0.894497,1.551584,3.145226,0.0,5.901073,0.0,0.0,266.678081,265.987483,532.665564,5.913928,29.521352,0.191924,35.627204,AOIs_ABENAKI.xls
1,ABENAKI,Flowline100ft,43.8303,-72.2361,THETFORD,6359.510779,186453.1,41.212481,4.014035,0.0,0.485129,0.02885,0.233947,0.091244,0.0,0.002157,0.0,0.0,0.002157,0.0,0.030748,0.111193,0.337027,0.0,0.478968,0.0,0.0,19.499594,21.754112,41.253706,3.738571,3.145864,0.0,6.884435,AOIs_ABENAKI.xls
2,ABENAKI,Waterbody100ft,43.8303,-72.2361,THETFORD,5631.483675,84493.26,17.34881,2.912812,0.017421,0.545608,0.021066,0.0,0.031506,0.0,0.0,0.0,0.0,0.0,0.067456,0.022888,0.044621,0.0,0.0,0.134965,0.0,0.0,9.12222,8.232879,17.355099,2.099783,2.956447,0.182035,5.238265,AOIs_ABENAKI.xls
3,ABENAKI,Buffer100ftWBFL,43.8303,-72.2361,THETFORD,11439.865203,265556.5,58.155297,6.164034,0.017421,0.867339,0.049915,0.233947,0.122749,0.0,0.002157,0.0,0.0,0.002157,0.067456,0.053636,0.155814,0.337027,0.0,0.613933,0.0,0.0,28.361753,29.837036,58.198789,4.854378,5.758748,0.182035,10.795161,AOIs_ABENAKI.xls
4,ABENAKI,Buffer250ftWaterbody,43.8303,-72.2361,THETFORD,5838.954038,212589.7,44.545557,5.968327,0.017421,0.683245,0.293499,0.782087,0.23685,0.0,0.0,0.0,0.0,0.0,0.067456,0.316489,0.349577,1.192506,0.0,1.926027,0.0,0.0,21.194604,23.420545,44.615149,3.266376,7.531832,0.191924,10.990132,AOIs_ABENAKI.xls


Lets save the resulting survey dataframe as a .csv for later use

In [18]:
# save the data frame as a .csv
new_data_df.to_csv(survay_save_path)

Here we will take the population dataset and melt it so that the years are also in the rows sp that we can merge on both town and year data.

In [19]:
# melt the population dataframe
if USE_POPULATION:
    population_1 = population.melt(id_vars='Town', var_name='year', value_name='population')
    chem_data_df = chem_data_df.merge(population_1, how='left', on=['Town', 'year'])
    display(chem_data_df.sample(5))

Unnamed: 0,LakeID,LakeStationNo,LakeStationType,Lat,Long,Town,ProjectID,VisitDate,VisitNumber,StartTime,CollectionMethodID,Depth,ActivityCategory,CharacteristicID,Symbol,Result,Calcs,ProjRemark,RemarkCode,DepthStratumCode,year,population
171978,MOREY,1,Pelagic,43.9247,-72.1533,FAIRLEE,SpringTP,2005-05-02,1,1311.0,Hydrolab,8.0,Reg,DO%,,93.2,Y,,,,2005,988.0
241595,STERLING,1,Pelagic,44.5561,-72.7747,CAMBRIDGE,AcidLake,1982-06-24,1,945.0,Thermister,6.0,Reg,TempC,,13.0,Y,,,,1982,
215777,SHADOW (GLOVER),1,Pelagic,44.66991,-72.22818,GLOVER,LayMon,2007-08-20,1,1000.0,Hose,15.0,Reg,Chla,,1.44,Y,,,,2007,1096.0
158579,MAIDSTONE,1,Pelagic,44.65036,-71.64738,MAIDSTONE,LayMon,2008-07-31,1,1121.0,Secchi,,Reg,Secchi,,6.5,Y,,,,2008,192.0
266632,TICKLENAKED,1,Pelagic,44.19067,-72.0989,RYEGATE,TMDL,2021-08-24,1,1100.0,Hydrolab,11.2,Reg,Cond,,170.5,Y,,,,2021,


In [20]:
# Now so that we have measurements
characteristics = pd.read_csv(characteristics_path)
characteristics.columns = ['CharacteristicID', 'CharacteristicName', 'UnitCode', 'SampleFraction']
characteristics.head()

Unnamed: 0,CharacteristicID,CharacteristicName,UnitCode,SampleFraction
0,AshFreeDryMass,Ash Free Dry Mass,mg,
1,BOD5,"Biological Oxygen Demand, 5 day",mg/l,
2,BottomDepth,Bottom depth,m,
3,BottomSecchi,Bottom secchi,,
4,CBOD5,"Carbonaceous Biological Oxygen Demand, 5 day",mg/l,


In [21]:
# we are going to merge on CharacteristicID so to make sure that they are the same across dataframes
# we will make all if them uppercase in both dataframes
characteristics['CharacteristicID'] = characteristics['CharacteristicID'].str.upper()
chem_data_df['CharacteristicID'] = chem_data_df['CharacteristicID'].str.upper()

In [22]:
# Merge characteristics df
chem_data_df_final = chem_data_df.merge(characteristics, how='left', on='CharacteristicID')
print('Final chem data Data frame shape: ', chem_data_df_final.shape)
chem_data_df_final.sample(5)

Final chem data Data frame shape:  (284535, 25)


Unnamed: 0,LakeID,LakeStationNo,LakeStationType,Lat,Long,Town,ProjectID,VisitDate,VisitNumber,StartTime,CollectionMethodID,Depth,ActivityCategory,CharacteristicID,Symbol,Result,Calcs,ProjRemark,RemarkCode,DepthStratumCode,year,population,CharacteristicName,UnitCode,SampleFraction
66126,ECHO (CHARTN),1,Pelagic,44.86231,-71.99507,CHARLESTON,Laymon,1981-06-05,1,1030.0,Hose,18.0,Reg,CHLA,,1.9,Y,,,,1981,,Chlorophyll-a,ug/l,
122097,HORTONIA,1,Pelagic,43.7553,-73.2022,HUBBARDTON,SpringTP,2005-04-18,1,946.0,Hydrolab,1.0,Reg,DO%,,83.7,Y,,,,2005,729.0,Dissolved Oxygen Saturation,%,
121285,HOLLAND,1,Pelagic,44.98588,-71.92928,HOLLAND,SpringTP,2020-05-12,1,1030.0,PlasticKemm,1.0,Reg,TMG,,0.406,Y,,,,2020,,Total Magnesium,mg/l,Total
46778,CHIPMAN,1,Pelagic,43.4089,-73.0317,TINMOUTH,SpringTP,2001-05-02,1,1410.0,Secchi,,Reg,SECCHI,,3.5,Y,,B,,2001,566.0,Secchi transparency,m,
258995,TICKLENAKED,1,Pelagic,44.19067,-72.0989,RYEGATE,LakeAsmt,2005-12-01,1,1102.0,Hydrolab,10.0,Reg,ORP,,576.0,Y,,,,2005,1174.0,Oxidation Reduction Potential,mV,


### Section 6: Add a Normalized Result Column

In [23]:
# add in normalized results columns
for char_id in chem_data_df_final['CharacteristicID'].unique():
    df1 = chem_data_df_final[chem_data_df_final['CharacteristicID'] == char_id]
    chem_data_df_final.loc[df1.index, 'NormResult'] = MinMaxScaler().fit_transform(df1['Result'].values.reshape(-1, 1))
    
print(chem_data_df_final.shape)
chem_data_df_final.head()

(284535, 26)


Unnamed: 0,LakeID,LakeStationNo,LakeStationType,Lat,Long,Town,ProjectID,VisitDate,VisitNumber,StartTime,CollectionMethodID,Depth,ActivityCategory,CharacteristicID,Symbol,Result,Calcs,ProjRemark,RemarkCode,DepthStratumCode,year,population,CharacteristicName,UnitCode,SampleFraction,NormResult
0,ABENAKI,1,Pelagic,43.8303,-72.2361,THETFORD,SpringTP,1988-04-22,1,,Secchi,,Reg,SECCHI,,2.8,Y,,B,,1988,2377.0,Secchi transparency,m,,0.176694
1,ABENAKI,1,Pelagic,43.8303,-72.2361,THETFORD,SpringTP,1988-04-22,1,,Kemmerer,1.4,Reg,TP,,9.0,Y,,,,1988,2377.0,Total Phosphorus,ug/l,Total,0.004051
2,ABENAKI,1,Pelagic,43.8303,-72.2361,THETFORD,SpringTP,1989-05-01,1,,Secchi,,Reg,SECCHI,,2.3,Y,,B,,1989,2417.0,Secchi transparency,m,,0.145028
3,ABENAKI,1,Pelagic,43.8303,-72.2361,THETFORD,SpringTP,1989-05-01,1,,Kemmerer,1.0,Reg,TP,,11.0,Y,,,,1989,2417.0,Total Phosphorus,ug/l,Total,0.005208
4,ABENAKI,1,Pelagic,43.8303,-72.2361,THETFORD,SpringTP,1990-04-20,1,,Secchi,,Reg,SECCHI,,3.1,Y,,B,,1990,2438.0,Secchi transparency,m,,0.195693


Lets save the resulting dataframe as a .csv for later use

In [24]:
chem_data_df_final.to_csv(chem_data_save_path)

## Documantation

In [25]:
%load_ext watermark
%watermark --iversions

numpy    : 1.21.5
pandas   : 1.3.4
sys      : 3.9.9 | packaged by conda-forge | (main, Dec 20 2021, 02:36:06) [MSC v.1929 64 bit (AMD64)]
re       : 2.2.1
geopandas: 0.10.2
IPython  : 7.29.0

