# Assignment 4: Climate Data Analysis for a Research Center

##### This notebook implements Functions using Numpy and Pandas 

#  Assignment Task:


   We will write functions to:
1. Initialize Temperature and Humidity Data.
2. Check for Missing Data.
3. Convert Temperature and Calculate Discomfort Index.
4. Analyze January Temperatures.
5. Identify Extreme Temperatures.
6. Calculate Quarterly Temperature Averages.
7. Classify Humidity Levels.
8. Apply Daily Pressure Trend to Temperature Data.






In [None]:

import numpy as np
import pandas as pd

temp_data = np.random.uniform(-10, 40, (500, 365))

# Randomly generate daily humidity data as a percentage
humidity_data = np.random.uniform(0, 100, (500, 365))


# Task 2: Check for Missing Data

# Randomly select 5% of the values set as NaN
temp_data_flat = temp_data.flatten()
humidity_data_flat = humidity_data.flatten()

# Calculate number of values to set as NaN
num_missing = int(0.05 * temp_data_flat.size)

# Set 5% of values to NaN randomly
temp_data_flat[np.random.choice(temp_data_flat.size, num_missing, replace=False)] = np.nan
humidity_data_flat[np.random.choice(humidity_data_flat.size, num_missing, replace=False)] = np.nan

# Reshape arrays back to original shapes with NaNs included
temp_data = temp_data_flat.reshape(500, 365)
humidity_data = humidity_data_flat.reshape(500, 365)

# Count and display total number of NaN values in each array
temp_nan_count = np.isnan(temp_data).sum()
humidity_nan_count = np.isnan(humidity_data).sum()

print(f"Total missing entries in temperature_data: {temp_nan_count}")
print(f"Total missing entries in humidity_data: {humidity_nan_count}")

# Task 3: Convert Temperature and Calculate Discomfort Index

# Convert temperature_data from Celsius to Fahrenheit
temp_data_fahrenheit = (temp_data * 9/5) + 32

# Compute "feels like" discomfort index
discomfort_index = 0.5 * (temp_data_fahrenheit + humidity_data)
# Cap discomfort index values at 80
discomfort_index = np.where(discomfort_index > 80, 80, discomfort_index)

# Task 4: Analyze January Temperatures

# Extract daily temperatures for January (first 31 days)
january_temperatures = temp_data[:, :31]

# Calculate the average January temperature across all 500 locations
average_january_temp = np.nanmean(january_temperatures)
print(f"Average January temperature across all locations: {average_january_temp:.2f}°C")

# Task 5: Identify Extreme Temperatures

# Mark temperatures exceeding 35°C as potential errors by setting them to NaN
temperature_data = np.where(temp_data > 35, np.nan, temp_data)

# Count number of NaN values per location
null_counts_per_location = np.isnan(temp_data).sum(axis=1)
print("Null value counts per location (for temperatures over 35°C):")
print(null_counts_per_location)

# Task 6: Calculate Quarterly Temperature Averages

# Reshape temperature_data into four quarters and calculate the average temperature
quarters = np.array_split(temp_data, 4, axis=1)
quarterly_averages = [np.nanmean(quarter, axis=1) for quarter in quarters]
quarterly_averages = np.array(quarterly_averages).T  # Shape to (500, 4)

print("Quarterly temperature averages for each location:")
print(quarterly_averages)

# Task 7: Classify Humidity Levels

# Classify humidity levels as "Dry" (<30%) and "Humid" (>70%)
dry_days = np.where(humidity_data < 30, 1, 0)
humid_days = np.where(humidity_data > 70, 1, 0)

# Count total number of "Dry" and "Humid" days for each location
dry_day_counts = dry_days.sum(axis=1)
humid_day_counts = humid_days.sum(axis=1)

print("Total number of 'Dry' and 'Humid' days per location:")
print("Dry days:", dry_day_counts)
print("Humid days:", humid_day_counts)

# Task 8: Apply Daily Pressure Trend to Temperature Data

# Generate a simulated pressure trend (can be sinusoidal to represent seasonal changes)
days = np.arange(365)
pressure_variation = 5 * np.sin(2 * np.pi * days / 365)  # Sinusoidal pattern over the year

# Apply pressure trend to adjust daily temperatures for each location
adjusted_temperature_data = temp_data + pressure_variation

# Print adjusted temperature data (optional)
print("Adjusted temperature data (first location):")
print(adjusted_temperature_data[0, :])


Total missing entries in temperature_data: 9125
Total missing entries in humidity_data: 9125
Average January temperature across all locations: 15.00°C
Null value counts per location (for temperatures over 35°C):
[11 23 20 14 10 15 15 18 15 17 11 20  9 27 21 17 18 25 15 16 28 19 18 19
 20 18 30 16 16 20 15 13 18 31 17 23 21 17 23 19 18 17 27  9 14 17 14 15
 21 18 10 16 21 25 13 18 16 22 19 21 12 22 15 14 17 19 13 20 31 23 24 17
 15 18 20 20 16 24 14 18 17 22 19 23 22 14 22 23 17 17 18 22 20 17 14 18
 16 20 17 21 29 20 24 21 13 19 16 18 24 18 18 13 23 19 19 16 13 18 13 18
 11 16 21 20 21 15 18 25 28 23 17 17 22 19 15 12 21 19 26 19 17 18 22 22
 18 18 17 16 19 14 17 17 21 15 21 20 28 20 20 18 18 17  9 16 19 21 18 24
 23 14 21 19 14 26 19 13 23 13 14 15 21 23  9  9 15  8 23 25 26 15 19 15
 16 15 17 21 18 11 19 14 25 16 14 26 17 18 17 18 15 19 20 18 14 18 19 18
 19 24 13 23 11 16 25 21 16 17 18 24 15 24 14 21 13 23 21 17 14 17 21 25
 17 17 17 27 16 27 12 22 15 13 27 13 21 14 19 16 15 17 28 