# Preparations

In [1]:
import pandas as pd
import geopandas as gpd
import folium
import geopy.distance as distance
import data_analysis
import seaborn as sns
import scipy.stats as stats

import matplotlib.pyplot as plt

In [2]:
def plotregion(m, df, linecolor='#00ff00'):
    df = df.dropna(subset=['temp', 'humidity'])
    line = df[['lat', 'lon']].values
    if len(line):
        folium.PolyLine(line, color=linecolor).add_to(m)

    for lat, lon, temp, humidity, distance in df[['lat', 'lon', 'temp', 'humidity', 'distance']].values:
        #color = f'#{int(temp / 20 * 256):02x}00{int(humidity / 100 * 256):02x}'
        #color = f'#{int(distance / 151 * 255):02x}00{int((1 - distance / 151) * 255):02x}'
        #color = 'black'
        folium.CircleMarker(location=[lat, lon], radius=5, fill_opacity=1, fill=True, stroke=False).add_to(m)


# Sanity Checks

In [3]:
m = folium.Map(tiles="Stadia.AlidadeSatellite", location=[42.446, -78.2155], zoom_start=18, zoom_control=False)
dates = ['04-04', '04-27']
all_ground = []
all_below_samples = []
all_above = []
for date in dates:
    for transect in range(1, 4):
        ground = data_analysis.ground_data(transect, date)
        below = data_analysis.below_canopy(transect, date)
        above = data_analysis.above_canopy(transect, date)

        below_samples = data_analysis.get_distances(ground, below, above)
        all_ground.append(ground)
        all_below_samples.append(below_samples)
        all_above.append(above)
        plotregion(m, below_samples, linecolor='gold')
        plotregion(m, above, linecolor='purple')
all_ground = pd.concat(all_ground, axis=0)
all_below_samples = pd.concat(all_below_samples, axis=0)
all_above = pd.concat(all_above, axis=0)
m

1 04-04
Normal
Dry
Dry
Dry
Dry
Dry
Dry
Dry
Dry
Dry
Dry
Dry
Normal
Dry
Wet+
2 04-04
Dry
Normal
Dry
Normal
Dry
Wet
Wet
Dry
Dry
Dry
Dry
Normal
Dry
Wet+
Wet+
3 04-04
Wet
Normal
Wet+
Dry
Wet
Wet+
Wet+
Wet+
Wet+
Wet+
Wet
Dry
Dry
Wet
Wet
1 04-27
nan


TypeError: 'float' object is not subscriptable

In [None]:
ground = data_analysis.ground_data(1, '04-04')
below = data_analysis.below_canopy(1, '04-04')
above = data_analysis.above_canopy(1, '04-04')
below_samples = data_analysis.get_distances(ground, below, above)
sns.lineplot(below_samples, x='distance', y='humidity', hue='direction');
plt.xlabel('Distance from the road (m)')
plt.ylabel('Humidity below canopy (%)')
plt.legend(title=False);

In [None]:
sns.lineplot(below_samples, x='distance', y='temp', hue='direction');
plt.xlabel('Distance from the road (m)')
plt.ylabel('Temperature below canopy (C)')
plt.legend(title=False);

In [None]:
below_samples.query('distance < 150').groupby('distance').apply(lambda g: distance.distance(*g[['lat', 'lon']].values).meters).plot();
plt.xlabel('Distance')
plt.ylabel('Return Error (m)')

# Combine Data

In [None]:
merged = data_analysis.merge(data_analysis.combine_data(all_below_samples), data_analysis.combine_data(all_above), ('_below', '_above'))

In [None]:
all_data = data_analysis.merge(all_ground.groupby(['date', 'transect', 'distance']).apply(lambda g: g.iloc[0][['soil_temperature', 'soil_moisture']]), merged, ('_ground', ''))

# Tests

## 1 Tests

In [None]:
stats.bartlett(*all_data[['soil_temperature', 'temp_below', 'temp_above']].values.T, nan_policy='omit')

In [None]:
stats.f_oneway(*all_data[['soil_temperature', 'temp_below', 'temp_above']].values.T, nan_policy='omit')

In [None]:
sns.histplot(all_data.melt(value_vars=['soil_temperature', 'temp_below', 'temp_above']), x='value', hue='variable');

In [None]:
stats.bartlett(*all_data[['humidity_below', 'humidity_above']].values.T, nan_policy='omit')

In [None]:
stats.f_oneway(*all_data[['humidity_below', 'humidity_above']].values.T, nan_policy='omit')

In [None]:
sns.histplot(all_data.melt(value_vars=['humidity_below', 'humidity_above']), x='value', hue='variable');

## 2 Tests

In [None]:
all_data.dropna(inplace=True)

In [None]:
stats.pearsonr(*all_data[['temp_below', 'temp_above']].values.T)

In [None]:
all_data.plot.scatter(x='temp_below', y='temp_above');

In [None]:
stats.pearsonr(*all_data[['soil_temperature', 'temp_below']].values.T)

In [None]:
all_data.plot.scatter(x='soil_temperature', y='temp_below');

In [None]:
stats.pearsonr(*all_data[['humidity_below', 'humidity_above']].values.T)

In [None]:
all_data.plot.scatter(x='humidity_below', y='humidity_above');

In [None]:
stats.pearsonr(*all_data[['humidity_below', 'soil_moisture']].values.T)

## 3 Tests

### Temperature

In [None]:
stats.pearsonr(*all_ground[['distance', 'soil_temperature']].values.T)

In [None]:
all_ground.groupby('distance')['soil_temperature'].mean().plot();

In [None]:
stats.pearsonr(*all_below_samples[['distance', 'temp']].values.T)

In [None]:
all_below_samples.groupby('distance')['temp'].mean().plot();

In [None]:
stats.pearsonr(*all_above[['distance', 'temp']].values.T)

In [None]:
all_above.groupby('distance')['temp'].mean().plot();

### Humidity

In [None]:
stats.pearsonr(*all_ground[['distance', 'soil_moisture']].values.T)

In [None]:
stats.pearsonr(*all_below_samples[['distance', 'humidity']].values.T)

In [None]:
all_below_samples.groupby('distance')['humidity'].mean().plot();

In [None]:
stats.pearsonr(*all_above[['distance', 'humidity']].values.T)

In [None]:
all_above.groupby('distance')['humidity'].mean().plot();

## 4 Tests

In [None]:
all_data.eval('temp_above_below = temp_above - temp_below', inplace=True)
all_data.eval('temp_below_ground = temp_below - soil_temperature', inplace=True)
all_data.eval('temp_above_ground = temp_above - soil_temperature', inplace=True)
all_data.eval('humidity_above_below = humidity_above - humidity_below', inplace=True)
all_data.reset_index(inplace=True)

In [None]:
sns.lineplot(all_data, x='distance', y='temp_above_below')
stats.pearsonr(*all_data[['distance', 'temp_above_below']].values.T)

In [None]:
sns.lineplot(all_data, x='distance', y='temp_below_ground')
stats.pearsonr(*all_data[['distance', 'temp_below_ground']].values.T)

In [None]:
sns.lineplot(all_data, x='distance', y='temp_above_ground')
stats.pearsonr(*all_data[['distance', 'temp_above_ground']].values.T)

In [None]:
sns.lineplot(all_data, x='distance', y='humidity_above_below')
stats.pearsonr(*all_data[['distance', 'humidity_above_below']].values.T)