In [5]:
import pandas as pd
import os

def concatenate_excel_files(folder_path):
    """
    Concatenates all Excel files in a given folder into a single DataFrame.
    Assumes that all Excel files have the same column structure.

    :param folder_path: str, the path to the folder containing Excel files.
    :return: DataFrame, the concatenated DataFrame containing all data.
    """
    # List all Excel files in the directory
    excel_files = [file for file in os.listdir(folder_path) if file.endswith('.xlsx')]

    # Initialize an empty list to store the DataFrames
    df_list = []

    # Loop through the list of Excel files
    for file in excel_files:
        # Construct the full file path
        file_path = os.path.join(folder_path, file)
        # Read the Excel file into a DataFrame
        df = pd.read_excel(file_path)
        # Append the DataFrame to the list
        df_list.append(df)

    # Concatenate all DataFrames in the list into one
    combined_df = pd.concat(df_list, ignore_index=True)
    
    return combined_df

def check_values_in_column(df, column_name):
    """
    Checks if the cells in the specified column of the DataFrame have values.
    Adds a new column to the DataFrame to store the boolean result.

    :param df: DataFrame, the DataFrame to check.
    :param column_name: str, the name of the column to check for values.
    :return: DataFrame, the DataFrame with the new boolean result column added.
    """
    # Define the new column name
    new_column_name = f'BOOL {column_name}'

    # Check if each cell in the specified column has a value and store the result in the new column
    df[new_column_name] = df[column_name].notnull()

    return df


In [7]:
# Define the path where the Excel files are located
folder_path_1 = 'C:\\Users\\User\\Desktop\\Final Year Project\\Code\\Processed_CX\\02_Transform_Data_Frame\\Result_1'
folder_path_2 = 'C:\\Users\\User\\Desktop\\Final Year Project\\Code\\Processed_CX\\02_Transform_Data_Frame\\Result_2'
folder_path_3 = 'C:\\Users\\User\\Desktop\\Final Year Project\\Code\\Processed_CX\\02_Transform_Data_Frame\\Result_3'

check_AOA_I = 'Angle of Attack (Indicated) (LH or only) (deg)'

In [8]:
combined_df_1 = concatenate_excel_files(folder_path_1)
combined_df_1['predictive or reactive wind shear warning'] = True
combined_df_1 = check_values_in_column(combined_df_1, check_AOA_I)
combined_df_1['label'] = True
combined_df_1

Unnamed: 0,Offset,UTC time,Airframe,Corrected Latitude (deg),Corrected Longitude (deg),Heading (true) (deg),Wind Direction (true) (deg),Wind Speed (knots),Airspeed (calibrated; 1 or Only) (knots),Baro-Corrected Altitude (ft),...,Angle of Attack (Corrected) (LH or only) (deg),Pitch Attitude (Capt or Only) (deg),Airport and Runway (Landing),Date-Time at Touchdown,Filename,adjusted landing time,Angle of Attack (Best Available) (deg),predictive or reactive wind shear warning,BOOL Angle of Attack (Indicated) (LH or only) (deg),label
0,59652.0,21:24:33,124,22.272130,113.824093,73.448812,161.542923,4.000,149.000,1444.0,...,5.625001,1.582032,VHHH 07R,2018-04-20 21:42:00,1103799.csv,2018-04-20 21:38:23,,True,True,True
1,59653.0,21:24:34,124,22.272365,113.824795,73.975996,92.109352,6.000,149.000,1440.0,...,5.273439,2.109375,VHHH 07R,2018-04-20 21:42:00,1103799.csv,2018-04-20 21:38:24,,True,True,True
2,59654.0,21:24:35,124,22.272597,113.825490,73.624292,98.789040,8.000,149.500,1424.0,...,2.285157,1.933594,VHHH 07R,2018-04-20 21:42:00,1103799.csv,2018-04-20 21:38:25,,True,True,True
3,59655.0,21:24:36,124,22.272829,113.826185,72.921017,123.222626,5.000,144.000,1412.0,...,5.273439,1.582032,VHHH 07R,2018-04-20 21:42:00,1103799.csv,2018-04-20 21:38:26,,True,True,True
4,59656.0,21:24:37,124,22.273049,113.826884,72.745088,112.939423,2.000,145.000,1400.0,...,5.800783,1.582032,VHHH 07R,2018-04-20 21:42:00,1103799.csv,2018-04-20 21:38:27,,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10697,11119.0,21:48:22,46,22.379940,114.009234,66.093758,159.609375,50.500,231.875,4964.0,...,3.471679,4.570312,VHHH 07L,2018-09-16 22:03:00,973024.csv,2018-09-16 22:02:56,,True,True,True
10698,11120.0,21:48:22,46,22.380659,114.010283,64.951180,159.609375,46.250,230.125,4976.0,...,4.658203,3.867188,VHHH 07L,2018-09-16 22:03:00,973024.csv,2018-09-16 22:02:57,,True,True,True
10699,11121.0,,46,22.381379,114.011333,63.105473,155.390625,42.875,231.375,4980.0,...,3.867187,3.515625,VHHH 07L,2018-09-16 22:03:00,973024.csv,2018-09-16 22:02:58,,True,True,True
10700,11122.0,,46,22.382129,114.012371,61.523441,161.718750,42.500,232.125,4984.0,...,3.164062,3.515625,VHHH 07L,2018-09-16 22:03:00,973024.csv,2018-09-16 22:02:59,,True,True,True


In [9]:
combined_df_2 = concatenate_excel_files(folder_path_2)
combined_df_2['15 knots at the point of GA'] = True
combined_df_2 = check_values_in_column(combined_df_2, check_AOA_I)
combined_df_2['label'] = True
combined_df_2

Unnamed: 0,Offset,UTC time,Airframe,Corrected Latitude (deg),Corrected Longitude (deg),Heading (true) (deg),Wind Direction (true) (deg),Wind Speed (knots),Airspeed (calibrated; 1 or Only) (knots),Baro-Corrected Altitude (ft),...,Angle of Attack (Corrected) (LH or only) (deg),Pitch Attitude (Capt or Only) (deg),Airport and Runway (Landing),Date-Time at Touchdown,Filename,adjusted landing time,Angle of Attack (Best Available) (deg),15 knots at the point of GA,BOOL Angle of Attack (Indicated) (LH or only) (deg),label
0,9388,7:13:55,46,22.285688,113.821250,72.064819,237.656250,27.250,169.0000,1420.0,...,0.000000,-2.488403,VHHH 07L,2019-08-26 07:35:00,1002183.csv,2019-08-26 07:30:19,,True,True,True
1,9389,7:13:56,46,22.286006,113.822199,72.020874,238.359375,27.000,168.4375,1408.0,...,0.175781,-2.499390,VHHH 07L,2019-08-26 07:35:00,1002183.csv,2019-08-26 07:30:20,,True,True,True
2,9390,7:13:57,46,22.286324,113.823148,71.993408,238.359375,27.000,167.8125,1384.0,...,0.483398,-2.422485,VHHH 07L,2019-08-26 07:35:00,1002183.csv,2019-08-26 07:30:21,,True,True,True
3,9391,7:13:58,46,22.286632,113.824102,72.042847,237.656250,26.875,167.8125,1372.0,...,0.483398,-2.466431,VHHH 07L,2019-08-26 07:35:00,1002183.csv,2019-08-26 07:30:22,,True,True,True
4,9392,7:13:59,46,22.286939,113.825057,72.130737,236.953125,26.750,167.2500,1360.0,...,0.483398,-2.603760,VHHH 07L,2019-08-26 07:35:00,1002183.csv,2019-08-26 07:30:23,,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31423,8781,,57,22.365787,113.982180,58.886723,206.015625,23.375,212.5000,4969.0,...,3.779297,4.570312,VHHH 07L,2019-08-26 07:39:00,993893.csv,2019-08-26 07:38:56,,True,True,True
31424,8782,,57,22.366405,113.983219,59.150394,208.125000,21.875,215.2500,4975.0,...,3.867187,4.218750,VHHH 07L,2019-08-26 07:39:00,993893.csv,2019-08-26 07:38:57,,True,True,True
31425,8783,7:21:44,57,22.366981,113.984175,59.150394,211.640625,23.375,213.5000,4980.0,...,3.295898,3.867188,VHHH 07L,2019-08-26 07:39:00,993893.csv,2019-08-26 07:38:58,,True,True,True
31426,8784,7:21:44,57,22.367556,113.985131,59.062504,210.937500,25.375,212.0000,4985.0,...,3.208008,3.515625,VHHH 07L,2019-08-26 07:39:00,993893.csv,2019-08-26 07:38:59,,True,True,True


In [11]:
combined_df_3 = concatenate_excel_files(folder_path_3)
combined_df_3['15 knots at the point of GA'] = True
combined_df_3 = check_values_in_column(combined_df_3, check_AOA_I)
combined_df_3['label'] = False
combined_df_3

Unnamed: 0,Offset,UTC time,Airframe,Corrected Latitude (deg),Corrected Longitude (deg),Heading (true) (deg),Wind Direction (true) (deg),Wind Speed (knots),Airspeed (calibrated; 1 or Only) (knots),Baro-Corrected Altitude (ft),...,Angle of Attack (Corrected) (LH or only) (deg),Pitch Attitude (Capt or Only) (deg),Airport and Runway (Landing),Date-Time at Touchdown,Filename,adjusted landing time,Angle of Attack (Best Available) (deg),15 knots at the point of GA,BOOL Angle of Attack (Indicated) (LH or only) (deg),label
0,5558.0,,88,22.286422,113.822197,67.939461,23.906250,10.000,160.000,,...,5.806813,1.977539,VHHH 07L,2020-12-15 06:45:00,1000244.csv,2020-12-15 06:42:24,,True,True,False
1,5559.0,6:42:47,88,22.286659,113.822927,67.851570,19.687500,10.000,160.750,1526.0,...,5.611826,1.977539,VHHH 07L,2020-12-15 06:45:00,1000244.csv,2020-12-15 06:42:25,,True,True,False
2,5560.0,,88,22.286897,113.823658,67.763680,19.687500,10.000,161.000,,...,6.085366,2.153320,VHHH 07L,2020-12-15 06:45:00,1000244.csv,2020-12-15 06:42:26,,True,True,False
3,5561.0,,88,22.287134,113.824386,67.675789,15.468750,10.000,159.500,,...,6.057511,2.285156,VHHH 07L,2020-12-15 06:45:00,1000244.csv,2020-12-15 06:42:27,,True,True,False
4,5562.0,,88,22.287371,113.825114,67.500008,15.468750,11.000,159.750,,...,6.085366,2.416991,VHHH 07L,2020-12-15 06:45:00,1000244.csv,2020-12-15 06:42:28,,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
712418,9493.0,,46,22.316359,113.914962,75.234383,267.890625,8.250,47.250,61.0,...,0.175781,-0.351562,VHHH 07L,2019-12-27 06:14:00,999942.csv,2019-12-27 06:13:56,,True,True,False
712419,9494.0,,46,22.316406,113.915199,77.695320,265.078125,6.750,48.250,60.0,...,0.087891,-0.351562,VHHH 07L,2019-12-27 06:14:00,999942.csv,2019-12-27 06:13:57,,True,True,False
712420,9495.0,6:14:50,46,22.316439,113.915433,80.683601,269.296875,5.500,46.625,61.0,...,0.439453,-0.703125,VHHH 07L,2019-12-27 06:14:00,999942.csv,2019-12-27 06:13:58,,True,True,False
712421,9496.0,6:14:50,46,22.316472,113.915666,83.759773,264.375000,4.625,42.250,61.0,...,0.307617,-0.703125,VHHH 07L,2019-12-27 06:14:00,999942.csv,2019-12-27 06:13:59,,True,True,False


In [12]:
result_combined_df = pd.concat([combined_df_1, combined_df_2, combined_df_3], ignore_index=True)
result_combined_df

Unnamed: 0,Offset,UTC time,Airframe,Corrected Latitude (deg),Corrected Longitude (deg),Heading (true) (deg),Wind Direction (true) (deg),Wind Speed (knots),Airspeed (calibrated; 1 or Only) (knots),Baro-Corrected Altitude (ft),...,Pitch Attitude (Capt or Only) (deg),Airport and Runway (Landing),Date-Time at Touchdown,Filename,adjusted landing time,Angle of Attack (Best Available) (deg),predictive or reactive wind shear warning,BOOL Angle of Attack (Indicated) (LH or only) (deg),label,15 knots at the point of GA
0,59652.0,21:24:33,124,22.272130,113.824093,73.448812,161.542923,4.000,149.000,1444.0,...,1.582032,VHHH 07R,2018-04-20 21:42:00,1103799.csv,2018-04-20 21:38:23,,True,True,True,
1,59653.0,21:24:34,124,22.272365,113.824795,73.975996,92.109352,6.000,149.000,1440.0,...,2.109375,VHHH 07R,2018-04-20 21:42:00,1103799.csv,2018-04-20 21:38:24,,True,True,True,
2,59654.0,21:24:35,124,22.272597,113.825490,73.624292,98.789040,8.000,149.500,1424.0,...,1.933594,VHHH 07R,2018-04-20 21:42:00,1103799.csv,2018-04-20 21:38:25,,True,True,True,
3,59655.0,21:24:36,124,22.272829,113.826185,72.921017,123.222626,5.000,144.000,1412.0,...,1.582032,VHHH 07R,2018-04-20 21:42:00,1103799.csv,2018-04-20 21:38:26,,True,True,True,
4,59656.0,21:24:37,124,22.273049,113.826884,72.745088,112.939423,2.000,145.000,1400.0,...,1.582032,VHHH 07R,2018-04-20 21:42:00,1103799.csv,2018-04-20 21:38:27,,True,True,True,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
754548,9493.0,,46,22.316359,113.914962,75.234383,267.890625,8.250,47.250,61.0,...,-0.351562,VHHH 07L,2019-12-27 06:14:00,999942.csv,2019-12-27 06:13:56,,,True,False,True
754549,9494.0,,46,22.316406,113.915199,77.695320,265.078125,6.750,48.250,60.0,...,-0.351562,VHHH 07L,2019-12-27 06:14:00,999942.csv,2019-12-27 06:13:57,,,True,False,True
754550,9495.0,6:14:50,46,22.316439,113.915433,80.683601,269.296875,5.500,46.625,61.0,...,-0.703125,VHHH 07L,2019-12-27 06:14:00,999942.csv,2019-12-27 06:13:58,,,True,False,True
754551,9496.0,6:14:50,46,22.316472,113.915666,83.759773,264.375000,4.625,42.250,61.0,...,-0.703125,VHHH 07L,2019-12-27 06:14:00,999942.csv,2019-12-27 06:13:59,,,True,False,True


In [14]:
method_1_path = r'C:\Users\User\Desktop\Final Year Project\Code\Processed_CX\04_Method_1_Raw_Data'
result_combined_df.to_excel(fr"{method_1_path}\Method_1_Raw_Data.xlsx", index=False)