***
# Import Libraries

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import sys
import pickle
import os

pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.width', 1000)   

***
# Loading Dataset

## Machine Settings

In [2]:
directory_of_data = os.path.join('Daten', 'Experiments')
directory_of_data

'Daten/Experiments'

In [3]:
directory_of_machine_settings = os.path.join(directory_of_data, '_Parameter', 'Parameter_1-318.csv')
directory_of_machine_settings

'Daten/Experiments/_Parameter/Parameter_1-318.csv'

In [4]:
# Read the machine setting CSV file:
machine_settings_dataframe = pd.read_csv(directory_of_machine_settings)
# Remove settings had error comment:
machine_settings_dataframe = machine_settings_dataframe[machine_settings_dataframe['Comments'].isna()]
# Remove redundant last row:
machine_settings_dataframe = machine_settings_dataframe[0:machine_settings_dataframe.shape[0]-1]
# Reindex rows starting from 1:
machine_settings_dataframe.index = [_ for _ in range(1, machine_settings_dataframe.shape[0]+1)]
# Drop the 'Comments' column:
machine_settings_dataframe = machine_settings_dataframe.drop(columns=['Comments'])
# Make Experiment column values integer format: 
machine_settings_dataframe.Experiment = machine_settings_dataframe.Experiment.astype(int)
machine_settings_dataframe

Unnamed: 0,Experiment,Tube,Outer-diameter,Wall-thickness,Target-angle,Wiper-die shortening,Pressure-die lateral position,Pressure-die distance,Pressure-die boost,Mandrel position,Mandrel retraction timing,Collet boost,Clamp-die lateral position
1,2,106.0,22.0,1.0,47.0,5.0,-50.45,0.3,0.0,-2909.8,2.0,0.85,225.25
2,3,105.0,22.0,1.0,47.0,5.0,-50.45,0.3,0.0,-2909.8,2.0,0.85,225.25
3,4,104.0,22.0,1.0,47.0,5.0,-50.45,0.3,0.0,-2909.6,2.0,0.85,225.25
4,5,103.0,22.0,1.0,47.0,5.0,-50.45,0.3,0.0,-2909.6,2.0,0.85,225.25
5,6,102.0,22.0,1.0,47.0,5.0,-50.45,0.3,0.0,-2909.4,2.0,0.85,225.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...
311,314,219.0,22.0,1.0,47.0,5.0,-50.45,0.6,0.9,-2905.6,2.0,0.90,225.40
312,315,218.0,22.0,1.0,47.0,5.0,-50.45,0.6,0.9,-2905.6,2.0,0.90,225.40
313,316,217.0,22.0,1.0,47.0,5.0,-50.45,0.6,0.9,-2905.6,2.0,0.90,225.40
314,317,216.0,22.0,1.0,47.0,5.0,-50.45,0.6,0.9,-2907.6,2.0,0.90,225.40


## Helper loading functions:

In [5]:
def geo_file_names_to_dataframe(geometry_files, setting_num):

    # Initialize an empty list to hold individual dataframes
    geo_dataframes = []
    
    for geo_file_name in geometry_files:
    
        geo_file_path = os.path.join(directory_of_data, str(setting_num), geo_file_name)
    
        # Extract the section name from the filename
        section = geo_file_name.split('GEOMETRY_SECTION_')[1].replace('.csv', '')
    
        geo_result_dataframe = pd.read_csv(geo_file_path, sep=';')
    
        # Add the 'Section' column
        geo_result_dataframe['Section'] = section
        
        # Append to the list
        geo_dataframes.append(geo_result_dataframe)
    
    
    # Concatenate all dataframes into one
    geometry_results_dataframe = pd.concat(geo_dataframes, ignore_index=True)
    
    # Display the first few rows to verify
    return geometry_results_dataframe

In [39]:
def process_load_files(load_files, setting_num):
    # Separate files into MACHINE and SENSOR groups
    machine_files = [f for f in load_files if 'MACHINE' in f]
    sensor_files = [f for f in load_files if 'SENSOR' in f]

    def process_group(files):
        dfs = []
        for file in files:
            # Extract section name from filename (e.g., "MACHINE_CLAMP-DIE_LATERAL" from "EXP_318_LOAD_MACHINE_CLAMP-DIE_LATERAL.csv")
            section = file.split('_LOAD_')[1].replace('.csv', '')
            
            # Read CSV without headers to retain all rows
            try:
                df = pd.read_csv(
                    os.path.join(directory_of_data, str(setting_num), file), 
                    sep=';', 
                    header=None,
                    encoding='latin-1'  # Fallback encoding
                )
            except UnicodeDecodeError:
                df = pd.read_csv(
                    os.path.join(directory_of_data, str(setting_num), file), 
                    sep=';', 
                    header=None,
                    encoding='utf-8'
                )
            
            # Extract parameter-unit from the first row (e.g., "Max Torque [%]")
            parameter_unit = df.iloc[0, 1]
            
            # Rename columns to include section and parameter-unit
            new_col_name = f"{section}_{parameter_unit.replace(' ', '_')}"
            df.columns = ['Time', new_col_name]
            
            dfs.append(df)
        
        # Merge all dataframes on Time (outer merge to retain all rows)
        if dfs:
            merged_df = dfs[0]
            for df in dfs[1:]:
                merged_df = pd.merge(merged_df, df, on='Time', how='outer')
            
            # Sort by Time (strings like "Time [s]" will appear at the end)
            merged_df['Time'] = pd.to_numeric(merged_df['Time'], errors='coerce')
            merged_df.sort_values('Time', inplace=True, na_position='last')
            merged_df['Time'] = merged_df['Time'].astype(str)
            merged_df.rename(columns={'Time': 'Time_[s]'}, inplace=True)
            merged_df.reset_index(drop=True, inplace=True)
            return merged_df
        else:
            return pd.DataFrame()

    # Process MACHINE and SENSOR files separately
    machine_df = process_group(machine_files)
    sensor_df = process_group(sensor_files)

    # Drop the last row (header remnants)
    if not machine_df.empty:
        machine_df = machine_df.drop(machine_df.index[-1])
    if not sensor_df.empty:
        sensor_df = sensor_df.drop(sensor_df.index[-1])

    return machine_df, sensor_df

In [40]:
def movement_file_names_to_dataframe(movement_files, setting_num):
    # List to hold individual dataframes
    dfs = []
    
    for movement_file in movement_files:
        # Construct full file path
        movement_file_path = os.path.join(directory_of_data, str(setting_num), movement_file)
        
        # Extract section name from filename (e.g., "COLLET_AXIAL" from "MOVEMENT_MACHINE_COLLET_AXIAL.csv")
        section = movement_file.split('MOVEMENT_MACHINE_')[1].replace('.csv', '')
        
        # Read CSV with correct encoding
        try:
            df = pd.read_csv(movement_file_path, sep=';', header=None, encoding='utf-8')
        except UnicodeDecodeError:
            # Fallback to latin-1 if utf-8 fails
            df = pd.read_csv(movement_file_path, sep=';', header=None, encoding='latin-1')
        
        # Extract parameter-unit from first row (e.g., "Displacement [mm]")
        parameter_unit = df.iloc[0, 1]
        
        # Create new column name
        new_col_name = f"{section}_{parameter_unit.replace(' ', '_')}"
        df.columns = ['Time', new_col_name]
        
        dfs.append(df)
    
    # Merge all dataframes
    if dfs:
        merged_df = dfs[0]
        for df in dfs[1:]:
            merged_df = pd.merge(merged_df, df, on='Time', how='outer')
        
        # Sort and clean Time column
        merged_df['Time'] = pd.to_numeric(merged_df['Time'], errors='coerce')
        merged_df.sort_values('Time', inplace=True, na_position='last')
        merged_df['Time'] = merged_df['Time'].astype(str)
        merged_df.rename(columns={'Time': 'Time_[s]'}, inplace=True)
        merged_df.reset_index(drop=True, inplace=True)
        
        # Drop the last row (header remnant)
        merged_df = merged_df.drop(merged_df.index[-1])
    else:
        merged_df = pd.DataFrame()
    
    return merged_df

## Dictionary preparetion:

In [41]:
experiments_process_and_results = {}

for setting_num in machine_settings_dataframe.Experiment:

    print(f'The experiment number is {setting_num}')

    experiments_process_and_results[f'Exp_{setting_num}'] = {}
    machine_setting_dict = {}
    geo_dict = {}
    load_machine_dict = {}
    load_sensor_dict = {}
    movement_dict = {}

    # List all files in the path
    files = os.listdir(os.path.join(directory_of_data, str(setting_num)))

    # Filter out only the CSV files
    csv_files = sorted([file for file in files if file.endswith('.csv')])
    # Exclude 'GEOMETRY_FEATURE_ARC.csv','GEOMETRY_FEATURE_LINEAR-1.csv','GEOMETRY_FEATURE_LINEAR-2.csv' files:
    csv_files = csv_files[3:]
    
    # Separate the filenames into three lists
    geometry_files = [name for name in csv_files if 'GEOMETRY' in name][:-1] # [:-1] For removing 'GEOMETRY_SPRINGBACK.csv'
    load_files = [name for name in csv_files if 'LOAD' in name]
    movement_files = [name for name in csv_files if 'MOVEMENT' in name]

    geo_result_df = geo_file_names_to_dataframe(geometry_files, setting_num)
    machine_df, sensor_df = process_load_files(load_files, setting_num)
    movement_results_df = movement_file_names_to_dataframe(movement_files, setting_num)

    experiments_process_and_results[f'Exp_{setting_num}']['machine_setting'] = machine_settings_dataframe[machine_settings_dataframe.Experiment == setting_num]
    experiments_process_and_results[f'Exp_{setting_num}']['load_machine_process'] = machine_df
    experiments_process_and_results[f'Exp_{setting_num}']['load_sensor_process'] = sensor_df
    experiments_process_and_results[f'Exp_{setting_num}']['movement'] = movement_results_df
    experiments_process_and_results[f'Exp_{setting_num}']['geometry_result'] = geo_result_df

The experiment number is 2
The experiment number is 3
The experiment number is 4
The experiment number is 5
The experiment number is 6
The experiment number is 7
The experiment number is 8
The experiment number is 9
The experiment number is 10
The experiment number is 11
The experiment number is 12
The experiment number is 13
The experiment number is 14
The experiment number is 15
The experiment number is 16
The experiment number is 17
The experiment number is 18
The experiment number is 19
The experiment number is 20
The experiment number is 21
The experiment number is 22
The experiment number is 23
The experiment number is 24
The experiment number is 25
The experiment number is 26
The experiment number is 27
The experiment number is 28
The experiment number is 29
The experiment number is 30
The experiment number is 31
The experiment number is 32
The experiment number is 33
The experiment number is 34
The experiment number is 35
The experiment number is 36
The experiment number is 37


***
## Experiment of usage:

In [46]:
experiments_process_and_results['Exp_303']['geometry_result'] # machine_setting, load_machine_process, load_sensor_process, movement, and geometry_result

Unnamed: 0,X [mm],Y [mm],Z [mm],Section
0,25.723707,1.438113e-15,-7.809569,ARC_0
1,25.593861,1.430734e-15,-7.678280,ARC_0
2,25.411079,1.420341e-15,-7.488272,ARC_0
3,25.305000,1.414304e-15,-7.374591,ARC_0
4,25.164151,1.406282e-15,-7.218400,ARC_0
...,...,...,...,...
52139,3.532279,3.162673e+01,-6.940746,LINEAR-2_9
52140,3.468555,3.156370e+01,-6.820582,LINEAR-2_9
52141,3.453445,3.154875e+01,-6.791652,LINEAR-2_9
52142,3.442702,3.153812e+01,-6.771237,LINEAR-2_9


***
# Memory and space of this dictinary:

In [48]:
def get_deep_size(obj, seen=None):
    """Recursively find the size of the object and all its contents."""
    size = sys.getsizeof(obj)
    if seen is None:
        seen = set()

    obj_id = id(obj)
    if obj_id in seen:
        return 0

    seen.add(obj_id)

    if isinstance(obj, dict):
        size += sum(get_deep_size(k, seen) + get_deep_size(v, seen) for k, v in obj.items())

    elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
        size += sum(get_deep_size(i, seen) for i in obj)

    return size

dict_size_bytes = get_deep_size(experiments_process_and_results)
dict_size_gb = dict_size_bytes / (1024 * 1024 * 1024) 
print(f"The dictionary uses {dict_size_gb:.2f} GB of memory.")

The dictionary uses 2.76 GB of memory.


In [49]:
# Save the dictionary to a file
with open('experiments_process_and_results.pkl', 'wb') as f:
    pickle.dump(experiments_process_and_results, f)

# Get the size of the file in bytes
file_size_bytes = os.path.getsize('experiments_process_and_results.pkl')

# Convert the size to MB or GB if needed
file_size_mb = file_size_bytes / (1024 * 1024)  # Convert bytes to MB
file_size_gb = file_size_bytes / (1024 * 1024 * 1024)  # Convert bytes to GB

# print(f"The dictionary takes {file_size_mb:.2f} MB of disk space.")
print(f"The dictionary takes {file_size_gb:.2f} GB of disk space.")

# Optionally, delete the file after checking its size
# os.remove('experiments_process_and_results.pkl')

The dictionary takes 0.61 GB of disk space.


In [50]:
# Path to the .pkl file
file_path = 'experiments_process_and_results.pkl'

# Load the dictionary from the file
with open(file_path, 'rb') as f:
    loaded_dict = pickle.load(f)

In [54]:
loaded_dict['Exp_46']['movement'].columns.tolist()

['Time_[s]',
 'BEND-DIE_LATERAL_Movement_[mm]',
 'BEND-DIE_ROTATING_Angle_[°]',
 'BEND-DIE_VERTICAL_Movement_[mm]',
 'CLAMP-DIE_LATERAL_Movement_[mm]',
 'COLLET_AXIAL_Movement_[mm]',
 'COLLET_ROTATING_Movement_[mm]',
 'MANDREL_AXIAL_Movement_[mm]',
 'PRESSURE-DIE_AXIAL_Movement_[mm]',
 'PRESSURE-DIE_LATERAL_Movement_[mm]',
 'PRESSURE-DIE_LEFT_AXIAL_Movement_[mm]']