In [None]:
import pandas as pd

# Read the input CSV file
df = pd.read_csv("/content/sample_data/final_data.csv")

# Convert the time format from HH:MM:SS to integer
df['Hour'] = df['Hour'].apply(lambda x: int(x.split(':')[0]))

# Save the updated data to a new CSV file
output_file = "output.csv"
df.to_csv(output_file, index=False)

print("Time format converted and data saved to", output_file)

Time format converted and data saved to output.csv


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load your data from CSV
data = pd.read_csv('/content/sample_data/output.csv')

# Convert 'Rain' column to binary (1 for 'Yes', 0 for 'No')
data['Rain'] = data['Rain'].apply(lambda x: 1 if x == 'Yes' else 0)

# Define the features and target variable
features = ['Hour', 'WS', 'RH', 'Rain']
target = 'Temp'

# Initialize variables to store the best configuration
best_X = 0
best_Y = 0
best_accuracy = 0

# Define the maximum values of X and Y
max_X = 8000
max_Y = 8659

# Iterate through different values of X and Y
for X in range(100, max_X + 1, 100):
    for Y in range(X, min(max_Y, X + 1000) + 1, 100):
        # Split the data into training and testing sets
        train_data = data.head(X)
        test_data = data.tail(Y)

        # Create the feature matrix and target vector
        X_train = train_data[features]
        y_train = train_data[target]
        X_test = test_data[features]
        y_test = test_data[target]

        # Train a Random Forest Regressor model
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)

        # Calculate the accuracy (e.g., using Mean Squared Error)
        accuracy = 1 - mean_squared_error(y_test, y_pred) / np.var(y_test)

        # Update the best configuration if the accuracy is higher
        if accuracy > best_accuracy:
            best_X = X
            best_Y = Y
            best_accuracy = accuracy

print("Optimal Configuration - X: {}, Y: {}, Accuracy: {:.2f}%".format(best_X, best_Y, best_accuracy * 100))

# Now you can train the final model using the best configuration (best_X) and predict temperature
final_train_data = data.head(best_X)
final_test_data = data.tail(best_Y)
X_final_train = final_train_data[features]
y_final_train = final_train_data[target]
X_final_test = final_test_data[features]
y_final_test = final_test_data[target]

final_model = RandomForestRegressor(n_estimators=100, random_state=42)
final_model.fit(X_final_train, y_final_train)

# Make predictions for the final test data
final_y_pred = final_model.predict(X_final_test)

# Save the results to a CSV file
results = pd.DataFrame({'Actual': y_final_test, 'Predicted': final_y_pred})
results.to_csv('temperature_predictions.csv', index=False)


Optimal Configuration - X: 8000, Y: 8100, Accuracy: 85.71%


In [None]:
from scipy.stats import pearsonr

# Calculate correlation between actual and predicted values
correlation, _ = pearsonr(y_final_test, final_y_pred)

# Calculate bias (mean error)
bias = np.mean(final_y_pred - y_final_test)


In [None]:
import matplotlib.pyplot as plt

# Plot actual vs. predicted temperature
plt.figure(figsize=(10, 6))
plt.scatter(y_final_test, final_y_pred, color='blue', label='Actual vs. Predicted')
plt.plot([min(y_final_test), max(y_final_test)], [min(y_final_test), max(y_final_test)], color='red', linestyle='--', linewidth=2, label='Ideal Line')
plt.xlabel('Actual Temperature')
plt.ylabel('Predicted Temperature')
plt.title('Actual vs. Predicted Temperature')
plt.legend()
plt.savefig('actual_vs_predicted.png')
plt.close()

# Scatter plot of predicted and observed values
plt.figure(figsize=(10, 6))
plt.scatter(y_final_test, final_y_pred, color='green', label='Scatter Plot')
plt.xlabel('Actual Temperature')
plt.ylabel('Predicted Temperature')
plt.title('Scatter Plot of Actual vs. Predicted Temperature')
plt.legend()
plt.savefig('scatter_plot.png')
plt.close()

# Save correlation and bias to a text file
with open('correlation_bias.txt', 'w') as file:
    file.write('Correlation: {:.2f}\n'.format(correlation))
    file.write('Bias: {:.2f}\n'.format(bias))

print("Validation plots and statistical metrics saved.")


Validation plots and statistical metrics saved.
