In [2]:
import os
import pandas as pd

def merge_csv_in_folder(folder_path):
    """
    Merges all CSV files in the specified folder into a single DataFrame.
    
    Args:
    folder_path (str): The path to the folder containing CSV files.

    Returns:
    pd.DataFrame: A DataFrame containing all merged CSV data.
    """
    all_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    dfs = []

    for file in all_files:
        file_path = os.path.join(folder_path, file)
        try:
            # Attempt to read the CSV file with semicolon delimiter and correct decimal handling
            df = pd.read_csv(file_path, sep=';', decimal=',')
            dfs.append(df)
        except pd.errors.ParserError as e:
            print(f"ParserError for file {file_path}: {e}")
        except Exception as e:
            print(f"Error reading {file_path}: {e}")

    # Concatenate all DataFrames in the list into a single DataFrame
    if dfs:  # Check if dfs is not empty
        merged_df = pd.concat(dfs, ignore_index=True)
        return merged_df
    else:
        print(f"No valid CSV files found in {folder_path}.")
        return pd.DataFrame()  # Return an empty DataFrame if no valid files

# Specify the paths to the folders containing the CSV files
folder1_path = '/Users/zafiraibraeva/Code/uni coding/thesis/dataset/debrecen hajnal hour '  # Path to the first folder
folder2_path = '/Users/zafiraibraeva/Code/uni coding/thesis/dataset/debrecen train station hours'  # Path to the second folder

# Merge CSV files in the first folder
merged_df_1 = merge_csv_in_folder(folder1_path)
print("Merged DataFrame from the first folder:")
print(merged_df_1)

# Merge CSV files in the second folder
merged_df_2 = merge_csv_in_folder(folder2_path)
print("Merged DataFrame from the second folder:")
print(merged_df_2)

# Merge the two final DataFrames
if not merged_df_1.empty and not merged_df_2.empty:
    final_merged_df = pd.concat([merged_df_1, merged_df_2], ignore_index=True)
    print("Final merged DataFrame:")
    print(final_merged_df)

    # Optional: Save the final merged DataFrame to a CSV file
    final_merged_df.to_csv('/Users/zafiraibraeva/Code/uni coding/thesis/dataset/AQ2018-2023-debrecen.csv', index=False)  # Output path
else:
    print("One of the merged DataFrames is empty; final merge skipped.")


Merged DataFrame from the first folder:
                      Date  CO (ug/m3)  NO2 (ug/m3)  NOx (ug/m3) PM10 (ug/m3)  \
0             Limit values     10000.0        100.0          NaN            -   
1      2019-01-01 00:00:00         NaN         14.2         22.9           19   
2      2019-01-01 01:00:00         NaN         15.7         24.2           36   
3      2019-01-01 02:00:00         NaN         14.3         22.5           24   
4      2019-01-01 03:00:00         NaN         25.3         35.4           22   
...                    ...         ...          ...          ...          ...   
51285  2021-12-31 19:00:00       913.0         27.8         60.5           24   
51286  2021-12-31 20:00:00       874.0         27.9         69.0           19   
51287  2021-12-31 21:00:00       897.0         23.7         52.8           22   
51288  2021-12-31 22:00:00       871.0         22.0         48.6           19   
51289  2021-12-31 23:00:00       845.0         23.7         75.3     

In [3]:
final_merged_df

Unnamed: 0,Date,CO (ug/m3),NO2 (ug/m3),NOx (ug/m3),PM10 (ug/m3),SO2 (ug/m3),O3 (ug/m3),PM2.5 (ug/m3)
0,Limit values,10000.0,100.0,,-,250.0,,
1,2019-01-01 00:00:00,,14.2,22.9,19,2.2,,
2,2019-01-01 01:00:00,,15.7,24.2,36,1.9,,
3,2019-01-01 02:00:00,,14.3,22.5,24,2.0,,
4,2019-01-01 03:00:00,,25.3,35.4,22,1.8,,
...,...,...,...,...,...,...,...,...
103802,2020-12-31 19:00:00,562.0,16.5,20.0,15,0.8,265,
103803,2020-12-31 20:00:00,552.0,15.8,19.0,11,1.0,239,
103804,2020-12-31 21:00:00,568.0,14.7,17.6,10,0.9,232,
103805,2020-12-31 22:00:00,572.0,16.5,19.5,9,1.0,205,


In [4]:
import os
import pandas as pd

def merge_csv_in_folder(folder_path):
    """
    Merges all CSV files in the specified folder into a single DataFrame.
    
    Args:
    folder_path (str): The path to the folder containing CSV files.

    Returns:
    pd.DataFrame: A DataFrame containing all merged CSV data.
    """
    all_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    dfs = []

    for file in all_files:
        file_path = os.path.join(folder_path, file)
        try:
            # Attempt to read the CSV file with semicolon delimiter and correct decimal handling
            df = pd.read_csv(file_path, sep=';', decimal=',')
            dfs.append(df)
        except pd.errors.ParserError as e:
            print(f"ParserError for file {file_path}: {e}")
        except Exception as e:
            print(f"Error reading {file_path}: {e}")

    # Concatenate all DataFrames in the list into a single DataFrame
    if dfs:  # Check if dfs is not empty
        merged_df = pd.concat(dfs, ignore_index=True)
        return merged_df
    else:
        print(f"No valid CSV files found in {folder_path}.")
        return pd.DataFrame()  # Return an empty DataFrame if no valid files

# Specify the paths to the folders containing the CSV files
folder1_path = '/Users/zafiraibraeva/Code/uni coding/thesis/dataset/debrecen hajnal hour '  # Path to the first folder
folder2_path = '/Users/zafiraibraeva/Code/uni coding/thesis/dataset/debrecen train station hours'  # Path to the second folder

# Merge CSV files in the first folder
merged_df_1 = merge_csv_in_folder(folder1_path)
if not merged_df_1.empty:
    print("Merged DataFrame from the first folder:")
    print(merged_df_1)
    # Save the merged DataFrame from the first folder to a CSV file
    merged_df_1.to_csv('/Users/zafiraibraeva/Code/uni coding/thesis/dataset/merged_debrecen_hajnal_hour.csv', index=False)  # Output path
else:
    print("No data merged from the first folder.")

# Merge CSV files in the second folder
merged_df_2 = merge_csv_in_folder(folder2_path)
if not merged_df_2.empty:
    print("Merged DataFrame from the second folder:")
    print(merged_df_2)
    # Save the merged DataFrame from the second folder to a CSV file
    merged_df_2.to_csv('/Users/zafiraibraeva/Code/uni coding/thesis/dataset/merged_debrecen_train_station_hours.csv', index=False)  # Output path
else:
    print("No data merged from the second folder.")


Merged DataFrame from the first folder:
                      Date  CO (ug/m3)  NO2 (ug/m3)  NOx (ug/m3) PM10 (ug/m3)  \
0             Limit values     10000.0        100.0          NaN            -   
1      2019-01-01 00:00:00         NaN         14.2         22.9           19   
2      2019-01-01 01:00:00         NaN         15.7         24.2           36   
3      2019-01-01 02:00:00         NaN         14.3         22.5           24   
4      2019-01-01 03:00:00         NaN         25.3         35.4           22   
...                    ...         ...          ...          ...          ...   
51285  2021-12-31 19:00:00       913.0         27.8         60.5           24   
51286  2021-12-31 20:00:00       874.0         27.9         69.0           19   
51287  2021-12-31 21:00:00       897.0         23.7         52.8           22   
51288  2021-12-31 22:00:00       871.0         22.0         48.6           19   
51289  2021-12-31 23:00:00       845.0         23.7         75.3     

In [5]:
import os
import pandas as pd

def merge_csv_in_folder(folder_path):
    """
    Merges all CSV files in the specified folder into a single DataFrame.
    
    Args:
    folder_path (str): The path to the folder containing CSV files.

    Returns:
    pd.DataFrame: A DataFrame containing all merged CSV data.
    """
    all_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    dfs = []

    for file in all_files:
        file_path = os.path.join(folder_path, file)
        try:
            # Attempt to read the CSV file with semicolon delimiter and correct decimal handling
            df = pd.read_csv(file_path, sep=';', decimal=',')
            dfs.append(df)
        except pd.errors.ParserError as e:
            print(f"ParserError for file {file_path}: {e}")
        except Exception as e:
            print(f"Error reading {file_path}: {e}")

    # Concatenate all DataFrames in the list into a single DataFrame
    if dfs:  # Check if dfs is not empty
        merged_df = pd.concat(dfs, ignore_index=True)
        return merged_df
    else:
        print(f"No valid CSV files found in {folder_path}.")
        return pd.DataFrame()  # Return an empty DataFrame if no valid files

# Specify the paths to the folders containing the CSV files
folder1_path = '/Users/zafiraibraeva/Code/uni coding/thesis/dataset/debrecen hajnal day'  # Path to the first folder
merged_df_1 = merge_csv_in_folder(folder1_path)
if not merged_df_1.empty:
    print("Merged DataFrame from the first folder:")
    print(merged_df_1)
    # Save the merged DataFrame from the first folder to a CSV file
    merged_df_1.to_csv("/Users/zafiraibraeva/Code/uni coding/thesis/thesis_code/thesis/data/merged_hajnal_data.csv", index=False)  # Output path
else:
    print("No data merged from the first folder.")

# Merge CSV files in the second folder

Merged DataFrame from the first folder:
                     Date CO (ug/m3)  NO2 (ug/m3)  NOx (ug/m3)  PM10 (ug/m3)  \
0            Limit values          -         85.0          NaN          50.0   
1     2019-01-01 00:00:00        NaN         25.3         62.2          18.0   
2     2019-01-02 00:00:00        NaN         24.1         40.2          10.0   
3     2019-01-03 00:00:00        NaN         21.5         38.0          10.0   
4     2019-01-04 00:00:00        NaN         31.7         77.1          19.0   
...                   ...        ...          ...          ...           ...   
3614  2014-12-27 00:00:00        611         21.0         38.9          20.0   
3615  2014-12-28 00:00:00        686         15.7         28.7          31.0   
3616  2014-12-29 00:00:00        595         15.9         24.0          24.0   
3617  2014-12-30 00:00:00        613         17.1         26.3          20.0   
3618  2014-12-31 00:00:00       1805         48.5        136.0          46.0   


In [6]:
from xgboost import XGBClassifier

ModuleNotFoundError: No module named 'xgboost'