# Import paths from environment variables

In [128]:
from dotenv import load_dotenv
import os
import geemap
import ee

# Load the environment variables from the .env file
load_dotenv()

# os.environ.clear()

# Retrieve paths from environment variables file
path_to_folder = os.getenv('PATH_TO_FOLDER')

path_in = os.path.join(path_to_folder, os.getenv('NDVI_INPUT'))
path_out = os.path.join(path_to_folder, os.getenv('NDVI_RAW_OUTPUT'))
path_out_processed = os.path.join(path_to_folder, os.getenv('NDVI_PROCESSED_OUTPUT'))

if path_in is None and path_out is None and path_out_processed is None:
    print("Paths not found in .env file")
else:
    print("Input path retrieved:", path_in)
    print("Output path retrieved:", path_out)
    print("Output path processed retrieved:", path_out_processed)

local_cell_coordinate_file = path_in + '/local_cell_coordinates.xlsx'
print("Local coordinates file path:", local_cell_coordinate_file)

out_landsat_annual = path_out + '/local_ndvi_landsat_annual.csv'
print("Intermediate output file path:", out_landsat_annual)


Input path retrieved: /Users/vaiostriantafyllou/Desktop/chile_lithium/data/raw_data/ndvi/ivas
Output path retrieved: /Users/vaiostriantafyllou/Desktop/chile_lithium/data/raw_data/ndvi/ivas/earth_engine_output
Output path processed retrieved: /Users/vaiostriantafyllou/Desktop/chile_lithium/data/processed_data/ndvi/ivas
Local coordinates file path: /Users/vaiostriantafyllou/Desktop/chile_lithium/data/raw_data/ndvi/ivas/local_cell_coordinates.xlsx
Intermediate output file path: /Users/vaiostriantafyllou/Desktop/chile_lithium/data/raw_data/ndvi/ivas/earth_engine_output/local_ndvi_landsat_annual.csv


# Set-up Google Earth Engine

In [18]:
geemap.update_package()
ee.Initialize()

Downloading https://github.com/gee-community/geemap/archive/master.zip ...
Unzipping geemap-master.zip ...
Data downloaded to: /Users/vaiostriantafyllou/Downloads/geemap-master
Processing /Users/vaiostriantafyllou/Downloads/geemap-master
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: geemap
  Building wheel for geemap (setup.py): started
  Building wheel for geemap (setup.py): finished with status 'done'
  Created wheel for geemap: filename=geemap-0.29.6-py2.py3-none-any.whl size=2191773 sha256=fb64a477fdf863d736d61a89ca5c7637efddc60cecc6f54bfb4f78e2f0dda369
  Stored in directory: /Users/vaiostriantafyllou/Library/Caches/pip/wheels/97/00/e0/de67a6fb80288e0af3875f1cb330e7e835566c0ae5606b32df
Successfully built geemap
Installing collected packages: geemap
  Attempting uninstall: geemap
    Found existing installation: geemap 0.29.6
    Uninstalling geemap-0.29.6:
      Successfully uninstalled 

# Parameters

In [69]:
# Location of Salar de Atacama
longitude = -68.25180257565476
latitude = -23.498660512303832

# Origin coordinates - Salar de Atacama
origin = str(latitude)+','+str(longitude)

# Surrounding distance for data collection
surrounding_distance = 200000

# Date range for data collection
start_date = '2013-01-01'
end_date = '2021-01-01'

# Degrees of smallest gridsize
grid_deg = 0.00027
grid_size = 30

# Functions

In [70]:
# Function for calculating distance using coordinates

def calc_distance(dataf):
    measure_coords = dataf["coordinates"]
    origin_coords = dataf["origin"]
    d = distance.distance(measure_coords, origin_coords).km
    return(d)

# Extract data from Google Earth Engine

In [71]:
# data extraction
selected_point = ee.Geometry.Point([longitude, latitude])
buffer = selected_point.buffer(surrounding_distance)

landsat_collection = ee.ImageCollection("LANDSAT/LC08/C01/T1_ANNUAL_NDVI").filterDate(start_date, end_date)\
    .filterBounds(buffer)

annual_imagery = landsat_collection.toBands()

In [None]:
# Define a function to process each row
def process_row(row):
    result = pd.DataFrame()  # Initialize an empty DataFrame to store results

    j = 1
    while f'lat{j+1}' in row.index and 'long1' and 'long2' in row.index:
        lat1_col = f'lat{j}'
        lat2_col = f'lat{j+1}'
        long1_col = 'long1'
        long2_col = 'long2'
        
        data = ee.Geometry.BBox(row[long1_col], row[lat1_col], row[long2_col], row[lat2_col])
        grid_deg = 0.00027  # Set your desired grid size in degrees
        grid_size = 30  # Set your desired pixel scale in meters

        fishnet = geemap.fishnet(data, h_interval=grid_deg, v_interval=grid_deg, delta=1)
        geemap.zonal_statistics(annual_imagery, fishnet, out_landsat_annual, statistics_type='MEAN', scale=grid_size)
        aux = pd.read_csv(out_landsat_annual)
        result = pd.concat([result, aux])

        j += 1
        time.sleep(2)

    return result

variations = ['a', 'b', 'c']  # Include an empty string for the default case

for i in range(1, 5):
    # Determine the file suffix
    file_suffixes = [f'{i}{variation}' for variation in variations] if i in [1, 12] else [str(i)]

    for suffix in file_suffixes:
        local_output = os.path.join(path_out, f'local_{suffix}.csv')
        cell_coordinates = pd.read_excel(local_cell_coordinate_file, sheet_name=suffix)

        # Apply the process_row function to each row of the DataFrame
        result_list = cell_coordinates.apply(process_row, axis=1)

        # Concatenate the individual results into a single DataFrame
        final_result = pd.concat(result_list.tolist(), ignore_index=True)

        # Save the final result DataFrame to a CSV file
        final_result.to_csv(local_output)

        print(f"Done with {suffix}")
        time.sleep(2)

## Calculate distances from the center of the Salar

In [135]:
# List of file names
file_names = ["local_1a", "local_1b", "local_1c"] + [f"local_{i}" for i in range(2, 12)] + ["local_12a", "local_12b", "local_12c"] + [f"local_{i}" for i in range(13, 21)]

def determine_group(file_name, idx):
    parts = file_name.split('_')  # Adjust the split based on your file name format
    for part in parts:
        if part in ["1a", "1b", "1c"]:
            return 1
        elif part in ["12a", "12b", "12c"]:
            return 12
        elif part in ["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"]:
            return idx - 2
        elif part in ["13", "14", "15", "16", "17", "18", "19", "20"]:
            return idx - 4

# Iterate over each file, process it, and save the processed version
for idx, file_name in enumerate(file_names, start=1):
    # Load each file
    df = pd.read_csv(f"{path_out}/{file_name}.csv")

    # Assign group - here, 'file_name' variable from the loop should be used directly
    df["group"] = determine_group(file_name, idx)

    # Drop unnecessary columns
    df = df.drop(columns=['Unnamed: 0', 'system:index'])

    # Calculate longitude, latitude, and coordinates
    df['longitude'] = (df['east'] + df['west']) / 2
    df['latitude'] = (df['north'] + df['south']) / 2
    df['coordinates'] = df['latitude'].astype(str) + ',' + df['longitude'].astype(str)

    # Assign origin and calculate distance
    df["origin"] = origin
    df['distance'] = df.apply(calc_distance, axis=1)

    # Save the processed file
    df.to_csv(f"{path_out_processed}/{file_name}_with_distances.csv", index=False)


# Calculate distances from individual wells

In [136]:
# Define well locations
wells = {
    "ca2015": (-23.539376, -68.058171),
    "socaire5": (-23.451465, -68.039001),
    "allana1": (-23.373125, -68.031744),
    "camar2": (-23.418412, -68.040096),
    "mullay1": (-23.302575, -68.022884)
}

def calc_distance_from_well(measure_coords, well_coords):
    return distance.distance(measure_coords, well_coords).km

# Process each file
for file_name in file_names:
    df = pd.read_csv(f"{path_out_processed}/{file_name}_with_distances.csv")

    # Add distance calculations for each well
    for well_name, well_coords in wells.items():
        df[f'distance_{well_name}'] = df['coordinates'].apply(lambda x: calc_distance_from_well(x, well_coords))

    # Save the updated file
    df.to_csv(f"{path_out_processed}/{file_name}_with_distances_individual.csv", index=False)