### Import Dependencies

In [2]:
import pandas as pd
import pathlib

### Mount Drive

In [1]:
try:
    from google.colab import drive # type: ignore
    drive.mount('/content/drive')
except ImportError:
    pass

### Define Directories

In [6]:
colab_data = pathlib.Path('/content/home/MyDrive/B_SanLuis/Data/')
local_data = pathlib.Path("../data/B_SanLuis/Data/")

DIR = colab_data if colab_data.exists() else local_data

CropConcern = 'Lettuce2024'
in_file = ''
out_file = ''

if CropConcern == 'Broccoli2023':
  in_file = DIR / '0_Input_Datasets/Broccoli/Broccoli2023_30min_piqc.csv'
  out_file = DIR / '1_Intermediate_Datasets/Broccoli/Broccoli2023FP.csv'

if CropConcern == 'Lettuce2023':
  in_file = DIR / '0_Input_Datasets/Lettuce/Lettuce2023_30min_gapfilled.csv'
  out_file = DIR / '1_Intermediate_Datasets/Lettuce/Lettuce2023FP.csv'

if CropConcern == 'Lettuce2024':
  in_file = DIR / '0_Input_Datasets/Lettuce/Lettuce2024_30min_gapfilled.csv'
  out_file = DIR / '1_Intermediate_Datasets/Lettuce/Lettuce2024FP.csv'

print('Input file is:', str(in_file))
print('Output file is:', str(out_file))

Input file is: ../data/B_SanLuis/Data/0_Input_Datasets/Lettuce/Lettuce2024_30min_gapfilled.csv
Output file is: ../data/B_SanLuis/Data/1_Intermediate_Datasets/Lettuce/Lettuce2024FP.csv


### Function

In [4]:
def cleandataFXN(crop_df):

    zm = 2.0  # Height from ground in meters
    d = 0.335  # Displacement in meters
    z0 = 0.1  # Roughness length in meters

    # Read CSV
    # df = pd.read_csv(crop_df)

    # Select and copy columns
    crop1_df = crop_df[['date_time', 'WS', 'USTAR', 'WD', 'V_SIGMA', 'MO_LENGTH']].copy()

    # Add columns
    crop1_df['zm'] = zm
    crop1_df['d'] = d
    crop1_df['z0'] = z0

    # Convert to datetime format
    crop1_df['date_time'] = pd.to_datetime(crop1_df['date_time'])

    # Separate year month day etc
    crop1_df['yyyy'] = crop1_df['date_time'].dt.year
    crop1_df['mm'] = crop1_df['date_time'].dt.month
    crop1_df['day'] = crop1_df['date_time'].dt.day
    crop1_df['HH'] = crop1_df['date_time'].dt.hour
    crop1_df['MM'] = crop1_df['date_time'].dt.minute

    # Rearrange columns
    crop2_df = crop1_df[['yyyy', 'mm', 'day', 'HH', 'MM', 'zm', 'd', 'z0', 'WS', 'MO_LENGTH', 'V_SIGMA', 'USTAR', 'WD']].copy()

    # Rename columns
    crop3_df = crop2_df.rename(columns={'WS': 'u_mean', 'MO_LENGTH': 'L', 'V_SIGMA': 'sigma_v', 'USTAR': 'u_star', 'WD': 'wind_dir'})

    # Subset the data to only include hours between 9 AM and 3 PM (inclusive)
    cropFinal_df = crop3_df[(crop3_df['HH'] > 8) & (crop3_df['HH'] < 16)]

    return cropFinal_df

### Run FXN and Save to Folder


In [7]:
df = pd.read_csv(in_file)

In [8]:
clean_df = cleandataFXN(df)

In [9]:
clean_df.to_csv(out_file, index=False)

In [10]:
clean_df

Unnamed: 0,yyyy,mm,day,HH,MM,zm,d,z0,u_mean,L,sigma_v,u_star,wind_dir
0,2024,4,10,11,0,2.0,0.335,0.1,1.181668,-0.344012,0.532709,0.093689,231.0488
1,2024,4,10,11,30,2.0,0.335,0.1,1.051614,-2.452590,0.812059,0.180846,300.0215
2,2024,4,10,12,0,2.0,0.335,0.1,2.245800,-4.511233,1.064353,0.230961,283.9656
3,2024,4,10,12,30,2.0,0.335,0.1,3.719224,-6.171940,1.186945,0.245948,290.1801
4,2024,4,10,13,0,2.0,0.335,0.1,4.239919,-9.471722,1.117503,0.283576,297.5996
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3120,2024,6,14,11,0,2.0,0.335,0.1,4.016666,-22.064510,1.001041,0.351602,286.4773
3121,2024,6,14,11,30,2.0,0.335,0.1,5.030773,-34.082950,1.145409,0.418396,290.0015
3122,2024,6,14,12,0,2.0,0.335,0.1,6.052983,-42.038580,1.314996,0.463808,285.5940
3123,2024,6,14,12,30,2.0,0.335,0.1,6.912714,-63.658600,1.439849,0.529082,286.6799


### Headers


yyyy - year
mm   - month [1-12]
day  - day [1-31]
HH   - hour [1-24]
MM   - minutes [0:30]
zm   - height from ground m [2.0]
d    - displacement m [.67*.5]
z0   - roughness length m [.1]
u_mean - avg wind speed m/s *** used WS
L    - obukhov length m *** used MO_LENGTH
sigma_v - lateral velocity m/s *** used v_sigma
u_star - friction velocity m/s *** used USTAR
wind_dir - wind direction in deg *** used WD