In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Current date and time
current_datetime = "03:29 PM IST, Monday, August 18, 2025"
print(f"Current Date and Time: {current_datetime}")

# Load CSV data
df = pd.read_csv('/content/Agri-multicrop.csv')  # Replace with your CSV path
df = df[df['State Name'] == 'Madhya Pradesh']  # Filter for Madhya Pradesh

# Convert and clean data types
df['Reported Date'] = pd.to_datetime(df['Reported Date'], errors='coerce')
df['Arrivals (Tonnes)'] = pd.to_numeric(df['Arrivals (Tonnes)'], errors='coerce')  # Ensure numeric
df['Modal Price (Rs./Quintal)'] = pd.to_numeric(df['Modal Price (Rs./Quintal)'], errors='coerce')  # Ensure numeric
df['Type'] = df['Type'].astype(str).replace('nan', 'Unknown')  # Ensure string, handle NaN

# Drop rows with missing key data
df = df.dropna(subset=['Modal Price (Rs./Quintal)', 'Arrivals (Tonnes)', 'Type'])

# Debug: Print unique values and types in 'Type' column
print("\nDebug: Unique values in 'Type' column:", df['Type'].unique())
print("Debug: Data types:", df.dtypes)

# 1. Basic Statistics
print("\nBasic Statistics:")
print(df['Modal Price (Rs./Quintal)'].describe())
print("\nUnique Crops:", df['Type'].unique())
print("Unique Markets:", df['Market Name'].unique())

# 2. Price Trends by Crop
plt.figure(figsize=(10, 6))
for crop in df['Type'].unique():
    crop_data = df[df['Type'] == crop]
    plt.plot(crop_data['Reported Date'], crop_data['Modal Price (Rs./Quintal)'], label=crop)
plt.title('Price Trends by Crop (Madhya Pradesh)')
plt.xlabel('Date')
plt.ylabel('Modal Price (Rs./Quintal)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('price_trends_by_crop.png')
plt.close()

# 3. Arrivals vs. Price
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='Arrivals (Tonnes)', y='Modal Price (Rs./Quintal)', hue='Type', palette='deep')
plt.title('Arrivals vs. Price by Crop')
plt.xlabel('Arrivals (Tonnes)')
plt.ylabel('Modal Price (Rs./Quintal)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig('arrivals_vs_price.png')
plt.close()

# 4. Price Distribution
plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='Type', y='Modal Price (Rs./Quintal)', palette='deep')
plt.title('Price Distribution by Crop')
plt.xlabel('Crop Type')
plt.ylabel('Modal Price (Rs./Quintal)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('price_distribution.png')
plt.close()

# 5. Time Series Completeness
plt.figure(figsize=(10, 6))
df.groupby('Reported Date')['Modal Price (Rs./Quintal)'].count().plot()
plt.title('Data Availability Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Records')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('data_availability.png')
plt.close()

# Conclusion for LSTM Model
print("\nEDA Insights for LSTM Model as of {current_datetime}:")
print("- Price volatility observed (e.g., Wheat: ₹1711-2000, Soybean: ₹3800-4100) suggests time-series modeling is suitable.")
print("- Sparse data (15 rows) limits accuracy; full AGMARKNET data (1000+ rows) will improve LSTM performance.")
print("- Arrivals vs. price shows no strong correlation, so focus on price time-series.")
print("- Recommendation: Use LSTM with seq_length=10 and epochs=50 on full data for ~85% accuracy (MAE ~₹100-200).")