In [None]:
import pandas as pd
import os
import zipfile

# Define the path to the extracted crops data
zip_path = '/mnt/data/Crops.zip'
unzip_dir = '/mnt/data/Crops'

# Unzip the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(unzip_dir)

# Function to load and preprocess individual crop files
def load_crop_data(unzip_dir):
    crop_data = []
    for root, dirs, files in os.walk(unzip_dir):
        for file in files:
            if file.endswith('.csv'):
                file_path = os.path.join(root, file)
                df = pd.read_csv(file_path)
                df = df[['Area', 'Item', 'Year', 'Unit', 'Value']]
                df.columns = ['Region', 'Crop', 'Year', 'Unit', 'Yield']
                crop_data.append(df)
    return pd.concat(crop_data, ignore_index=True)

# Load all crop data
crop_df = load_crop_data(unzip_dir)

# Load rainfall and temperature datasets
rainfall_df = pd.read_csv('/mnt/data/rainfall.csv')
temperature_df = pd.read_csv('/mnt/data/temperature.csv')

# Preprocess rainfall dataset
rainfall_df.columns = ['Year', 'Region', 'Rainfall']

# Preprocess temperature dataset
temperature_df.columns = ['Year', 'Region', 'MinTemp', 'MaxTemp', 'AvgTemp']

# Merge datasets on Region and Year
merged_df = pd.merge(crop_df, rainfall_df, on=['Region', 'Year'])
merged_df = pd.merge(merged_df, temperature_df, on=['Region', 'Year'])

# Convert categorical variables to numerical
merged_df['Region'] = merged_df['Region'].astype('category').cat.codes
merged_df['Crop'] = merged_df['Crop'].astype('category').cat.codes

# Display the first few rows of the merged dataset
print(merged_df.head())
