In [1]:
from dotenv import load_dotenv
import os
import geemap
import ee

# Load the environment variables from the .env file
load_dotenv()

# os.environ.clear()

# Retrieve paths from environment variables file
path_to_folder = os.getenv('PATH_TO_FOLDER')

path_in = os.path.join(path_to_folder, os.getenv('NDVI_INPUT'))
path_out = os.path.join(path_to_folder, os.getenv('NDVI_RAW_OUTPUT'))
path_out_processed = os.path.join(path_to_folder, os.getenv('NDVI_PROCESSED_OUTPUT'))

if path_in is None and path_out is None and path_out_processed is None:
    print("Paths not found in .env file")
else:
    print("Input path retrieved:", path_in)
    print("Output path retrieved:", path_out)
    print("Output path processed retrieved:", path_out_processed)

local_cell_coordinate_file = path_in + '/local_cell_coordinates.xlsx'
print("Local coordinates file path:", local_cell_coordinate_file)

out_landsat_annual = path_out + '/local_ndvi_landsat_annual.csv'
print("Intermediate output file path:", out_landsat_annual)


Input path retrieved: /Users/vaiostriantafyllou/Desktop/chile_lithium/data/raw_data/ndvi/ivas
Output path retrieved: /Users/vaiostriantafyllou/Desktop/chile_lithium/data/raw_data/ndvi/ivas/earth_engine_output
Output path processed retrieved: /Users/vaiostriantafyllou/Desktop/chile_lithium/data/processed_data/ndvi/ivas
Local coordinates file path: /Users/vaiostriantafyllou/Desktop/chile_lithium/data/raw_data/ndvi/ivas/local_cell_coordinates.xlsx
Intermediate output file path: /Users/vaiostriantafyllou/Desktop/chile_lithium/data/raw_data/ndvi/ivas/earth_engine_output/local_ndvi_landsat_annual.csv


In [2]:
import pandas as pd
df = pd.read_stata(path_out_processed + '/database_final.dta')

In [4]:
df=df[:100000]

In [5]:
import econtools.metrics as mt


In [6]:
df

Unnamed: 0,id,longitude,latitude,year,ndvi,group,inst,value,ndvi_sd,inst_sd,village,inst_vill,linst,lndvi
0,2344042.0,-68.141754,-22.833843,2013,0.064833,12,0.000131,407106.0,-0.200033,2.704352,1.0,2.704352,-8.940525,0.062818
1,1178953.0,-67.804314,-23.850735,2013,0.084350,8,0.000106,407106.0,0.340190,2.005913,0.0,0.000000,-9.152798,0.080980
2,1609495.0,-67.608772,-23.254793,2013,0.069062,10,0.000105,407106.0,-0.082966,1.976830,0.0,0.000000,-9.162694,0.066782
3,2131849.0,-67.387680,-23.041878,2013,0.064448,11,0.000175,407106.0,-0.210678,3.926222,0.0,0.000000,-8.651903,0.062456
4,2170222.0,-67.367165,-23.122337,2013,0.058920,11,0.000172,407106.0,-0.363699,3.848562,0.0,0.000000,-8.667965,0.057249
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,1936118.0,-67.460037,-22.990038,2013,0.096027,11,0.000165,407106.0,0.663415,3.657429,0.0,0.000000,-8.708633,0.091692
99996,2585840.0,-67.859283,-23.597876,2013,0.080206,15,0.000052,407106.0,0.225495,0.512207,1.0,0.512207,-9.857520,0.077152
99997,307350.0,-68.104866,-23.146505,2013,0.116563,3,0.000047,407106.0,1.231841,0.368420,0.0,0.000000,-9.961220,0.110255
99998,2466322.0,-67.980286,-23.407448,2013,0.025873,14,0.000015,407106.0,-1.278417,-0.520655,1.0,-0.520655,-11.087076,0.025544


In [15]:
id_dummies = pd.get_dummies(df['id'], prefix='id')
year_dummies = pd.get_dummies(df['year'], prefix='year')

# Join the dummy variables with the original DataFrame
df = df.join(id_dummies)
df = df.join(year_dummies)

# Now, create interaction terms
for year in df['year'].unique():
    df[f'year{year}_longitude'] = df['year'].eq(year).astype(int) * df['longitude']
    df[f'year{year}_latitude'] = df['year'].eq(year).astype(int) * df['latitude']


In [16]:
df

Unnamed: 0,id,longitude,latitude,year,ndvi,group,inst,value,ndvi_sd,inst_sd,...,id_2685085.0,id_2685100.0,id_2685135.0,id_2685166.0,id_2685193.0,id_2685250.0,id_2685258.0,year_2013,year2013_longitude,year2013_latitude
0,2344042.0,-68.141754,-22.833843,2013,0.064833,12,0.000131,407106.0,-0.200033,2.704352,...,False,False,False,False,False,False,False,True,-68.141754,-22.833843
1,1178953.0,-67.804314,-23.850735,2013,0.084350,8,0.000106,407106.0,0.340190,2.005913,...,False,False,False,False,False,False,False,True,-67.804314,-23.850735
2,1609495.0,-67.608772,-23.254793,2013,0.069062,10,0.000105,407106.0,-0.082966,1.976830,...,False,False,False,False,False,False,False,True,-67.608772,-23.254793
3,2131849.0,-67.387680,-23.041878,2013,0.064448,11,0.000175,407106.0,-0.210678,3.926222,...,False,False,False,False,False,False,False,True,-67.387680,-23.041878
4,2170222.0,-67.367165,-23.122337,2013,0.058920,11,0.000172,407106.0,-0.363699,3.848562,...,False,False,False,False,False,False,False,True,-67.367165,-23.122337
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,1936118.0,-67.460037,-22.990038,2013,0.096027,11,0.000165,407106.0,0.663415,3.657429,...,False,False,False,False,False,False,False,True,-67.460037,-22.990038
99996,2585840.0,-67.859283,-23.597876,2013,0.080206,15,0.000052,407106.0,0.225495,0.512207,...,False,False,False,False,False,False,False,True,-67.859283,-23.597876
99997,307350.0,-68.104866,-23.146505,2013,0.116563,3,0.000047,407106.0,1.231841,0.368420,...,False,False,False,False,False,False,False,True,-68.104866,-23.146505
99998,2466322.0,-67.980286,-23.407448,2013,0.025873,14,0.000015,407106.0,-1.278417,-0.520655,...,False,False,False,False,False,False,False,True,-67.980286,-23.407448


In [17]:
# List of original independent variables
independent_vars = ['inst_sd']

# Programmatically get the list of dummy variable names and interaction terms
# Assuming that the dummy variables and interaction terms follow a specific naming pattern
dummy_vars = [col for col in df.columns if col.startswith('id_') or col.startswith('year')]
interaction_terms = [col for col in df.columns if 'longitude' in col or 'latitude' in col]

# Combine the lists
all_independent_vars = independent_vars + dummy_vars + interaction_terms

# Perform the regression with all independent variables
results = mt.reg(df, 'ndvi_sd', all_independent_vars, shac=shac_params)



: 

In [None]:
results

Dependent variable:	ndvi_sd
N:			100000
R-squared:		-0.0324
Estimation method:	OLS
VCE method:		SHAC
  SHAC kernel:	  unif
  SHAC bandwidth:	  2
        coeff    se                    t   p>t CI_low CI_high
inst_sd 0.096 0.000 2614606248053371.000 0.000  0.096   0.096