# Alayze: Rainfall data

In [28]:
import csv
from statistics import mean, median
from collections import defaultdict

# Step 1: Read CSV Data
def read_csv_data(file_path):
    data = []
    with open(file_path, 'r') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            # Check if the row corresponds to the temperature data (adjust if needed)
            if row and row[0] == 'T2M_MAX':
                data.append(row)
    return data

# Step 2: Analyze Data by Year and Month
def analyze_data(data):
    yearly_data = defaultdict(list)
    monthly_data = defaultdict(list)
    
    # Analyze each row for yearly and monthly temperatures
    for row in data:
        year = int(row[1])  # Assuming the year is in the second column
        # Extract yearly max temperature
        yearly_max = max(float(temp) for temp in row[4:-1] if temp != '-999')
        yearly_data[year].append(yearly_max)
        
        # Extract monthly temperatures for analysis
        for month, temp in zip(
            ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'], row[4:-1]
        ):
            if temp != '-999':  # Skip missing data
                monthly_data[month].append(float(temp))
    
    return yearly_data, monthly_data

# Step 3: Print Analysis Results
def print_analysis(yearly_data, monthly_data):
    print("Yearly Analysis:")
    for year, temps in sorted(yearly_data.items()):
        print(f"{year}: Max: {max(temps):.2f}°C, Avg: {mean(temps):.2f}°C")
    
    print("\nMonthly Analysis:")
    for month, temps in monthly_data.items():
        print(f"{month}: Max: {max(temps):.2f}°C, Avg: {mean(temps):.2f}°C, Median: {median(temps):.2f}°C")

# Step 4: Main Function to Execute the Workflow
def main():
    file_path = 'temperature_data.csv'  # Update with the actual path to your CSV
    data = read_csv_data("POWER_Regional_Monthly_2010_2022.csv")  # Update file path
    yearly_data, monthly_data = analyze_data(data)
    print_analysis(yearly_data, monthly_data)

# Entry Point
if __name__ == "__main__":
    main()


Yearly Analysis:
2010: Max: 47.94°C, Avg: 43.40°C
2011: Max: 47.22°C, Avg: 41.24°C
2012: Max: 48.83°C, Avg: 44.24°C
2013: Max: 48.58°C, Avg: 43.02°C
2014: Max: 49.72°C, Avg: 43.80°C
2015: Max: 48.68°C, Avg: 43.39°C
2016: Max: 47.38°C, Avg: 43.23°C
2017: Max: 48.80°C, Avg: 42.72°C
2018: Max: 47.63°C, Avg: 41.51°C
2019: Max: 48.61°C, Avg: 43.45°C
2020: Max: 48.27°C, Avg: 42.04°C
2021: Max: 44.39°C, Avg: 40.63°C
2022: Max: 48.71°C, Avg: 42.31°C

Monthly Analysis:
JAN: Max: 32.73°C, Avg: 26.06°C, Median: 26.87°C
FEB: Max: 38.51°C, Avg: 31.42°C, Median: 33.14°C
MAR: Max: 44.16°C, Avg: 37.55°C, Median: 39.15°C
APR: Max: 47.26°C, Avg: 40.73°C, Median: 42.48°C
MAY: Max: 48.71°C, Avg: 42.02°C, Median: 44.05°C
JUN: Max: 49.72°C, Avg: 40.86°C, Median: 42.64°C
JUL: Max: 46.42°C, Avg: 34.84°C, Median: 34.72°C
AUG: Max: 41.02°C, Avg: 32.36°C, Median: 33.27°C
SEP: Max: 40.40°C, Avg: 31.58°C, Median: 32.58°C
OCT: Max: 39.36°C, Avg: 30.78°C, Median: 31.62°C
NOV: Max: 34.84°C, Avg: 28.09°C, Median: 28.8

## To Heatmap

### Rainfall data difference

In [159]:
import pandas as pd
import numpy as np

# Read the CSV file into a DataFrame
file_path = 'POWER_Regional_Monthly_2010_2022.csv'
data = pd.read_csv(file_path, skiprows=10, 
                   names=["PARAMETER", "YEAR", "LAT", "LON", 
                          "JAN", "FEB", "MAR", "APR", 
                          "MAY", "JUN", "JUL", "AUG", 
                          "SEP", "OCT", "NOV", "DEC", "ANN"])

# Convert 'ANN' column to numeric values
data['ANN'] = pd.to_numeric(data['ANN'], errors='coerce')

# Filter data for 2010 and 2022
data_2010 = data[data['YEAR'] == 2010][['LAT', 'LON', 'ANN']].rename(columns={'ANN': 'ANN_2010'})
data_2022 = data[data['YEAR'] == 2022][['LAT', 'LON', 'ANN']].rename(columns={'ANN': 'ANN_2022'})

# Merge the two dataframes on LAT and LON
merged_data = pd.merge(data_2010, data_2022, on=['LAT', 'LON'])

# Calculate the difference
merged_data['Difference'] = merged_data['ANN_2022'] - merged_data['ANN_2010']

# Filter out rows with NaN values in the difference
merged_data = merged_data.dropna(subset=['Difference'])

# Convert the data for heatmap
heatmap_data = merged_data[['LAT', 'LON', 'Difference']]

# Display the first few rows of the merged DataFrame
print(heatmap_data.values.tolist())


[[22.75, 80.25, -0.3100000000000023], [22.75, 80.75, -0.490000000000002], [22.75, 81.25, -0.7199999999999989], [22.75, 81.75, -0.730000000000004], [22.75, 82.25, -0.7000000000000028], [22.75, 82.75, -0.6899999999999977], [22.75, 83.25, -0.6999999999999957], [22.75, 83.75, -0.8200000000000003], [22.75, 84.25, -0.7999999999999972], [22.75, 84.75, -0.759999999999998], [22.75, 85.25, -0.7000000000000028], [22.75, 85.75, -0.8800000000000026], [22.75, 86.25, -1.7199999999999989], [22.75, 86.75, -2.960000000000001], [22.75, 87.25, -3.489999999999995], [22.75, 87.75, -3.0799999999999983], [22.75, 88.25, -1.9499999999999957], [22.75, 88.75, -0.740000000000002], [22.75, 89.25, -0.46999999999999886], [23.25, 80.25, -0.04999999999999716], [23.25, 80.75, -0.12000000000000455], [23.25, 81.25, -0.3299999999999983], [23.25, 81.75, -0.3200000000000003], [23.25, 82.25, -0.28999999999999915], [23.25, 82.75, -0.28999999999999915], [23.25, 83.25, -0.3200000000000003], [23.25, 83.75, -0.4000000000000057], [

In [177]:
import pandas as pd
import json

# Sample data preparation (assuming you have the merged_data DataFrame from previous steps)
# Convert the DataFrame to GeoJSON
def dataframe_to_geojson(df, lat_col, lon_col):
    features = []
    for _, row in df.iterrows():
        feature = {
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [row[lon_col], row[lat_col]]
            },
            "properties": {
                "ANN_2010": row["ANN_2010"],
                "ANN_2022": row["ANN_2022"],
                "Difference": row["Difference"],
            }
        }
        features.append(feature)
    
    geojson = {
        "type": "FeatureCollection",
        "features": features
    }
    return geojson

# Convert the dataframe to geojson
geojson_data = dataframe_to_geojson(merged_data, lat_col="LAT", lon_col="LON")

print(geojson_data)

# Save to a .geojson file
# with open('rainfall_data.geojson', 'w') as f:
#     json.dump(geojson_data, f, indent=2)

print("GeoJSON file created successfully!")


{'type': 'FeatureCollection', 'features': [{'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [80.25, 22.75]}, 'properties': {'ANN_2010': 45.67, 'ANN_2022': 45.36, 'Difference': -0.3100000000000023}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [80.75, 22.75]}, 'properties': {'ANN_2010': 44.88, 'ANN_2022': 44.39, 'Difference': -0.490000000000002}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [81.25, 22.75]}, 'properties': {'ANN_2010': 44.33, 'ANN_2022': 43.61, 'Difference': -0.7199999999999989}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [81.75, 22.75]}, 'properties': {'ANN_2010': 45.1, 'ANN_2022': 44.37, 'Difference': -0.730000000000004}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [82.25, 22.75]}, 'properties': {'ANN_2010': 45.68, 'ANN_2022': 44.98, 'Difference': -0.7000000000000028}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [82.75, 22.75]}, 'properties': {'AN