In [14]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_squared_error
from math import sqrt
import os
import pickle
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [15]:
data_folder = "folder_data"
output_file = f"data_analysis_world.csv"
appended_data = pd.DataFrame()

def get_country_name(filename):
    # Extract filename without extension
    base_filename = os.path.splitext(filename)[0]
    # Extract country name from filename (assuming filename format is "CountryName.csv")
    country_name = base_filename.capitalize()  # Capitalize the first letter
    return country_name

for folder in sorted(os.listdir(data_folder)):
    folder_path = os.path.join(data_folder, folder)
    if os.path.isdir(folder_path):
        print(f"Processing files in folder: {folder}")
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            if file.endswith('.csv'):
                print(f"Reading file: {file}")
                data = pd.read_csv(file_path)
                print(data.head())
                print()
                country_name = get_country_name(file)

                data['DATE'] = pd.to_datetime(data['DATE'])

                # Extract year from the date
                data['YEAR'] = data['DATE'].dt.year

                # Filter out the year 2024
                data = data[data['YEAR'] != 2024]

                # Group by country and year, calculating the average temperature
                aggregated_data = data.groupby(['YEAR'])['TAVG'].mean().reset_index()

                # Sort the data by year
                aggregated_data.sort_values(by='YEAR', inplace=True)

                # Calculate the difference in temperature between consecutive years
                aggregated_data['DIFF'] = aggregated_data['TAVG'].diff()

                # Reset index for better readability
                aggregated_data.reset_index(drop=True, inplace=True)

                # Add country name as a column
                aggregated_data['COUNTRY'] = country_name
                aggregated_data = aggregated_data[aggregated_data['YEAR'] >= 1975]

                # Display the DataFrame with country name, year, and temperature difference
                print(aggregated_data)
                appended_data = appended_data.append(aggregated_data, ignore_index=True)
         
appended_data.to_csv(output_file, index=False)
                
                

Processing files in folder: Afghanistan
Reading file: Afghanistan.csv
       STATION        NAME        DATE  TAVG
0  TI000038954  KHOROG, TI  1974-01-01  23.9
1  TI000038954  KHOROG, TI  1974-02-01  24.8
2  TI000038954  KHOROG, TI  1974-03-01  37.5
3  TI000038954  KHOROG, TI  1974-04-01  54.7
4  TI000038954  KHOROG, TI  1974-05-01  59.7

    YEAR       TAVG      DIFF      COUNTRY
1   1975  46.858333 -1.283333  Afghanistan
2   1976  49.031667  2.173333  Afghanistan
3   1977  49.583333  0.551667  Afghanistan
4   1978  48.483333 -1.100000  Afghanistan
5   1979  49.966667  1.483333  Afghanistan
6   1980  50.983333  1.016667  Afghanistan
7   1981  49.833333 -1.150000  Afghanistan
8   1982  48.191667 -1.641667  Afghanistan
9   1983  49.391667  1.200000  Afghanistan
10  1984  50.391667  1.000000  Afghanistan
11  1985  50.375000 -0.016667  Afghanistan
12  1986  49.033333 -1.341667  Afghanistan
13  1987  48.358333 -0.675000  Afghanistan
14  1988  50.091667  1.733333  Afghanistan
15  1989  46.9

Processing files in folder: Czech Republic
Reading file: Czech Republic.csv
       STATION                   NAME        DATE  TAVG
0  EZE00100082  PRAHA KLEMENTINUM, EZ  1974-01-01  38.6
1  EZE00100082  PRAHA KLEMENTINUM, EZ  1974-02-01  40.1
2  EZE00100082  PRAHA KLEMENTINUM, EZ  1974-03-01  46.9
3  EZE00100082  PRAHA KLEMENTINUM, EZ  1974-04-01  50.1
4  EZE00100082  PRAHA KLEMENTINUM, EZ  1974-05-01  56.8

    YEAR       TAVG      DIFF         COUNTRY
1   1975  47.654167  0.375000  Czech republic
2   1976  46.291667 -1.362500  Czech republic
3   1977  46.620833  0.329167  Czech republic
4   1978  45.312500 -1.308333  Czech republic
5   1979  45.829167  0.516667  Czech republic
6   1980  44.341667 -1.487500  Czech republic
7   1981  46.225000  1.883333  Czech republic
8   1982  47.441667  1.216667  Czech republic
9   1983  47.916667  0.475000  Czech republic
10  1984  45.470833 -2.445833  Czech republic
11  1985  44.958333 -0.512500  Czech republic
12  1986  46.070833  1.112500  Czec

Processing files in folder: Kiribati
Reading file: Kiribati.csv
   STATION      NAME        DATE  TAVG
0        3  Kiribati  1974-01-01  80.6
1        3  Kiribati  1974-02-01  78.9
2        3  Kiribati  1974-03-01  79.9
3        3  Kiribati  1974-04-01  79.6
4        3  Kiribati  1974-05-01  78.3

    YEAR       TAVG      DIFF   COUNTRY
1   1975  79.983333  0.408333  Kiribati
2   1976  79.675000 -0.308333  Kiribati
3   1977  80.475000  0.800000  Kiribati
4   1978  80.108333 -0.366667  Kiribati
5   1979  80.508333  0.400000  Kiribati
6   1980  80.800000  0.291667  Kiribati
7   1981  80.241667 -0.558333  Kiribati
8   1982  80.516667  0.275000  Kiribati
9   1983  80.616667  0.100000  Kiribati
10  1984  80.533333 -0.083333  Kiribati
11  1985  80.608333  0.075000  Kiribati
12  1986  80.766667  0.158333  Kiribati
13  1987  80.658333 -0.108333  Kiribati
14  1988  81.033333  0.375000  Kiribati
15  1989  80.383333 -0.650000  Kiribati
16  1990  81.058333  0.675000  Kiribati
17  1991  81.408333  

Processing files in folder: Rwanda
Reading file: Rwanda.csv
         DATE   TAVG    NAME  STATION
0  1974-01-01  73.71  Rwanda       23
1  1974-02-01  73.08  Rwanda       23
2  1974-03-01  71.86  Rwanda       23
3  1974-04-01  67.56  Rwanda       23
4  1974-05-01  64.50  Rwanda       23

    YEAR       TAVG      DIFF COUNTRY
1   1975  69.298333  0.395833  Rwanda
2   1976  69.281667 -0.016667  Rwanda
3   1977  70.287500  1.005833  Rwanda
4   1978  69.811667 -0.475833  Rwanda
5   1979  69.968333  0.156667  Rwanda
6   1980  69.807500 -0.160833  Rwanda
7   1981  69.324167 -0.483333  Rwanda
8   1982  70.068333  0.744167  Rwanda
9   1983  72.224167  2.155833  Rwanda
10  1984  70.350833 -1.873333  Rwanda
11  1985  69.536667 -0.814167  Rwanda
12  1986  70.100833  0.564167  Rwanda
13  1987  71.730833  1.630000  Rwanda
14  1988  70.265000 -1.465833  Rwanda
15  1989  69.969167 -0.295833  Rwanda
16  1990  71.391667  1.422500  Rwanda
17  1991  67.242500 -4.149167  Rwanda
18  1992  68.542500  1.3000