In [None]:
import rasterio
from rasterio.mask import mask
import geopandas as gpd
from shapely.geometry import mapping
from rasterio.crs import CRS

def generateNumpyFromTif(fileName, geoJson):

  # Load the GeoTIFF file of the entire state
  with rasterio.open(fileName) as state_src:
      state_data = state_src.read(1)  # Read raster data
      state_meta = state_src.meta    # Get metadata
      print(state_meta)

      # Define the CRS of the state raster
      state_crs = CRS.from_epsg(5070)  # Assuming EPSG:5070
      state_crs = state_meta['crs']

      # Load the shapefile containing county boundaries
      counties = gpd.read_file(geoJson)
      # Set CRS for county boundaries
      counties = counties.set_crs(epsg=4326, allow_override=True)
      # Reproject county boundaries to match the CRS of the state raster
      counties = counties.to_crs(state_crs)

      # Create an empty list to store masked datasets
      masked_datasets = []
      # Iterate over each county
      for i, county in counties.iterrows():
          # Convert the county boundary to GeoJSON format
          county_geojson = mapping(county['geometry'])
          try: # Mask the state data by the county boundary
            masked_data, masked_transform = mask(state_src, [county_geojson], crop=True)
            # Append the masked dataset to the list
            masked_datasets.append((masked_data[0], county['countyfp']))
          except:
            pass
  return masked_datasets

In [None]:
arr = generateNumpyFromTif('/content/drive/MyDrive/Conservatives-Official/Surface_Temperature/Data/MODIS_Surface_Temperature/avg_surface_temp_Mar_to_May_2015.tif', '/content/drive/MyDrive/Conservatives-Official/Surface_Temperature/Data/South_Dakota_County_Boundaries.geojson')

In [None]:
import h5py

# Create a new HDF5 file
with h5py.File('data.h5', 'w') as f:
    # Create datasets within the file
    f.create_dataset('dataset_name', data=data_array)
    # You can create multiple datasets as needed
    # f.create_dataset('another_dataset', data=another_data_array)


In [None]:
import pandas as pd
df_1 = pd.read_csv('/content/drive/MyDrive/Conservatives-Official/Surface_Temperature/Data/F59D4B49-DA13-3186-B13D-09EB8A55030B.csv')
df_1 = pd.DataFrame(df_1)
#un = df["Data Item"].unique()
df_1 = df_1[df_1["Data Item"] == 'WHEAT, SPRING, (EXCL DURUM) - YIELD, MEASURED IN BU / ACRE']
df_1.columns
filtered_data_1 = df_1[['State ANSI','County ANSI','Year', 'Value']]

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Conservatives-Official/Surface_Temperature/Data/CC467883-C048-3E63-8CEC-477F196EAEE2.csv')
df = pd.DataFrame(df)
#un = df["Data Item"].unique()
df = df[df["Data Item"] == 'WHEAT, SPRING, (EXCL DURUM) - YIELD, MEASURED IN BU / ACRE']
df.columns
filtered_data = df[['State ANSI','County ANSI','Year', 'Value']]
filtered_data.fillna(0, inplace=True)
filtered_data.replace([np.inf, -np.inf], 0, inplace=True)
filtered_data['County ANSI'] = filtered_data['County ANSI'].astype(int)
filtered_data['Year'] = filtered_data['Year'].astype(int)

In [None]:
df_concatenated = pd.concat([filtered_data, filtered_data_1], axis=0, ignore_index=True)
df_concatenated.head(10)
df_concatenated.fillna(0, inplace=True)
df_concatenated.replace([np.inf, -np.inf], 0, inplace=True)
df_concatenated['County ANSI'] =df_concatenated['County ANSI'].astype(int)
df_concatenated['Year'] = df_concatenated['Year'].astype(int)

In [None]:
import h5py
import numpy as np
from skimage.transform import resize

# Define the new dimensions after resizing
new_height = 100
new_width = 100



states = ['46']
years = ['2007','2008','2009','2010','2011','2012','2013','2014','2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']

# Create or open the HDF5 file
with h5py.File('/content/drive/MyDrive/Conservatives-Official/Surface_Temperature/Pickle_files/south_dakota_wheat.h5', 'a') as hf:
    # Check if the dataset exists; if not, create it
    if 'features' not in hf:
        hf.create_dataset('features', shape=(0, new_height, new_width), maxshape=(None, new_height, new_width), chunks=True)
        hf.create_dataset('county', shape=(0,1), maxshape=(None, 1), chunks=True)
        hf.create_dataset('year', shape=(0,1), maxshape=(None, 1), chunks=True)
        hf.create_dataset('state', shape=(0,1), maxshape=(None, 1), chunks=True)
        hf.create_dataset('yield', shape=(0,1), maxshape=(None, 1), chunks=True)

    # Loop over years and states
    for year in years:
        for state in states:
            new_X = generateNumpyFromTif(fileName=f'/content/drive/MyDrive/MODIS_Surface_Temperature/avg_surface_temp_Mar_to_May_{year}.tif', geoJson='/content/drive/MyDrive/Conservatives-Official/Surface_Temperature/Data/MODIS_Surface_Temperature/avg_surface_temp_Mar_to_May_2015.tif', '/content/drive/MyDrive/Conservatives-Official/Surface_Temperature/Data/South_Dakota_County_Boundaries.geojson')
            for i in range(len(new_X)):
              data = new_X[i][0]
              # Resize the data
              resized_data = resize(data, (new_height, new_width), anti_aliasing=True)
              county = new_X[i][1]
              # Append new data to the HDF5 file
              # print(hf['features'].shape)
              temp = df_concatenated[df_concatenated['Year']==int(year)]
              temp = temp[temp['State ANSI']== 46]
              temp = temp[temp['County ANSI'] == int(county)]# get from the  excel file with respect to the county FSID and the year of SD
              yld = temp['Value']
              if not yld.empty:
                  yld = yld.iloc[0]  # Using iloc method
                  hf['features'].resize(hf['features'].shape[0] + 1, axis=0)  # Increase dataset size by 1
                  # print(hf['features'].shape, hf['features'][-1].shape, resized_data.shape)
                  hf['features'][-1] = resized_data
                  hf['county'].resize(hf['county'].shape[0]+1, axis=0)
                  hf['county'][-1] = county
                  hf['year'].resize(hf['year'].shape[0]+1, axis=0)
                  hf['year'][-1] = year
                  hf['state'].resize(hf['state'].shape[0]+1, axis=0)
                  hf['state'][-1] = state
                  hf['yield'].resize(hf['yield'].shape[0]+1, axis=0)
                  hf['yield'][-1] = yld
              else: pass

    print(hf['features'].shape, hf['county'].shape)