### All Predictions

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor


def processDataset(dataset_path, model_name, model):
    dataset = pd.read_csv(dataset_path)
    
    # Selected relevant features and handle missing values
    selected_features = ['Area Code (M49)', 'Year Code', 'Value']
    dataset_selected = dataset[selected_features].dropna()
    
    # Features (X) and target variable (y) define
    X = dataset_selected[['Area Code (M49)', 'Year Code']]
    y = dataset_selected['Value']
    
    # Datasets splited into training and testing sets (80%, 20%)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
   
    model.fit(X_train, y_train)
    
    # Make predictions
    predictions = model.predict(X_test)
    
    # Store the prediction outputs into DataFrame
    prediction_outputs = pd.DataFrame({
        'Data Instance ID': X_test.index,
        f'True Label ({model_name})': y_test,
        f'Prediction ({model_name})': predictions
    })
    
    return prediction_outputs



# List of dataset paths
dataset_paths = [
    "../Consumer prices indicators - FAOSTAT_data_en_2-22-2024.csv",
    "../Crops production indicators - FAOSTAT_data_en_2-22-2024.csv",
    "../Emissions - FAOSTAT_data_en_2-27-2024.csv",
    "../Employment - FAOSTAT_data_en_2-27-2024.csv",
    "../Exchange rate - FAOSTAT_data_en_2-22-2024.csv",
    "../Fertilizers use - FAOSTAT_data_en_2-27-2024.csv",
    "../Food balances indicators - FAOSTAT_data_en_2-22-2024.csv",
    "../Food security indicators  - FAOSTAT_data_en_2-22-2024.csv",
    "../Food trade indicators - FAOSTAT_data_en_2-22-2024.csv",
    "../Foreign direct investment - FAOSTAT_data_en_2-27-2024.csv",
    # "../Land temperature change - FAOSTAT_data_en_2-27-2024.csv",
    "../Land use - FAOSTAT_data_en_2-22-2024.csv",
    "../Pesticides use - FAOSTAT_data_en_2-27-2024.csv",
    ]

# Initialize models
linear_model = LinearRegression()
mlp_model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', random_state=42)

# List to store all prediction DataFrames
all_predictions = []

# This process each dataset and collect predictions
for dataset_path in dataset_paths:
    # This process dataset with linear regression
    linear_predictions = processDataset(dataset_path, "Linear Regression", linear_model)
    all_predictions.append(linear_predictions)
    
    # This process dataset with MLP regression
    mlp_predictions = processDataset(dataset_path, "MLP Regression", mlp_model)
    all_predictions.append(mlp_predictions)

# Concatenates all prediction DataFrames
all_predictions_df = pd.concat(all_predictions, ignore_index=True)

# And then save all concatenated DataFrame to a CSV file
all_predictions_df.to_csv("../MLWorks/predictions.csv", index=False)


  dataset = pd.read_csv(dataset_path)
  dataset = pd.read_csv(dataset_path)


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor


def processDataset(dataset_path, model_name, model):
    dataset = pd.read_csv(dataset_path)
    
    # Selected relevant features and handle missing values
    selected_features = ['Area Code (M49)', 'Year Code', 'Value']
    dataset_selected = dataset[selected_features].dropna()
    
    # Features (X) and target variable (y) define
    X = dataset_selected[['Area Code (M49)', 'Year Code']]
    y = dataset_selected['Value']
    
    # Datasets splited into training and testing sets (80%, 20%)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
   
    model.fit(X_train, y_train)
    
    # Make predictions
    predictions = model.predict(X_test)
    
    # Store the prediction outputs into DataFrame
    prediction_outputs = pd.DataFrame({
        'Data Instance ID': X_test.index,
        f'True Label ({model_name})': y_test,
        f'Prediction ({model_name})': predictions
    })
    
    return prediction_outputs



# List of dataset paths
datasets = [
    "../Consumer prices indicators - FAOSTAT_data_en_2-22-2024.csv",
    "../Crops production indicators - FAOSTAT_data_en_2-22-2024.csv",
    "../Emissions - FAOSTAT_data_en_2-27-2024.csv",
    "../Employment - FAOSTAT_data_en_2-27-2024.csv",
    "../Exchange rate - FAOSTAT_data_en_2-22-2024.csv",
    "../Fertilizers use - FAOSTAT_data_en_2-27-2024.csv",
    "../Food balances indicators - FAOSTAT_data_en_2-22-2024.csv",
    "../Food security indicators  - FAOSTAT_data_en_2-22-2024.csv",
    "../Food trade indicators - FAOSTAT_data_en_2-22-2024.csv",
    "../Foreign direct investment - FAOSTAT_data_en_2-27-2024.csv",
    # "../Land temperature change - FAOSTAT_data_en_2-27-2024.csv",
    "../Land use - FAOSTAT_data_en_2-22-2024.csv",
    "../Pesticides use - FAOSTAT_data_en_2-27-2024.csv",
    ]

In [7]:
# Define models to use for each dataset
models = {
    'Linear Regression': LinearRegression(),
    'MLP Regression': MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', random_state=42)
}

all_prediction_outputs = []

for dataset in datasets:
    for model_name, model in models.items():
        prediction_outputs = processDataset(dataset, model_name, model)
        all_prediction_outputs.append(prediction_outputs)

combined_prediction_outputs = pd.concat(all_prediction_outputs, axis=1)

# Save combined DataFrame to a single CSV file
combined_prediction_outputs.to_csv('../all_predictions.csv', index=False)


  dataset = pd.read_csv(dataset_path)
  dataset = pd.read_csv(dataset_path)


### Merging notbooks into one file and Convert the code file to PDF file

In [4]:
import nbformat
import nbformat
# from nbconvert import PDFExporter

notebooks = [
    "consumer_prices_indicators.ipynb",
    "crops_production_indicators.ipynb",
    "employment.ipynb",
    "exchange_rate.ipynb",
    "emissions.ipynb",
    "fertilizers_use.ipynb",
    "food_balances_indicators.ipynb",
    "food_security_indicators.ipynb",
    "food_trade_indicators.ipynb",
    "foriegn_direct_investment.ipynb",
    "land_temperature_change.ipynb",
    "land_use.ipynb",
    "pesticides_use.ipynb",
    "predictions.ipynb",
    
]

merged_notebook = nbformat.v4.new_notebook()

for notebook_file in notebooks:
    with open(notebook_file, 'r', encoding='utf-8') as f:
        notebook_content = nbformat.read(f, as_version=4)
    merged_notebook.cells.extend(notebook_content.cells)
output_file = 'all_notebook_code.ipynb'
with open(output_file, 'w', encoding='utf-8') as f:
    nbformat.write(merged_notebook, f)


  validate(nb)


In [None]:
merged_notebook_file = 'all_notebook_code.ipynb'
with open(merged_notebook_file, 'r', encoding='utf-8') as f:
    merged_notebook = nbformat.read(f, as_version=4)

# Configured PDFExporter
pdf_exporter = PDFExporter()
pdf_exporter.exclude_input = True

# Convert notebook to PDF
pdf_data, _ = pdf_exporter.from_notebook_node(merged_notebook)

# Saved PDF output
pdf_output_file = '../MLWork/alls_notebook_code.pdf'
with open(pdf_output_file, 'wb') as f:
    f.write(pdf_data)
