In [23]:
# Import necessary libraries
import pandas as pd
import os
import glob
import abyss
import abyss.dataparser



def list_directories_and_files(root_dir='.'):
    """
    Returns a list of subdirectories and the files within each subdirectory.
    
    Args:
        root_dir (str): The root directory to start from. Defaults to current directory.
        
    Returns:
        dict: A dictionary where keys are subdirectory paths and values are lists of files in each subdirectory.
    """
    # Find all subdirectories
    subdirectories = [d for d in glob.glob(os.path.join(root_dir, '*')) if os.path.isdir(d)]
    
    # Create a dictionary to store results
    result = {}
    
    # For each subdirectory, get all files
    for subdir in subdirectories:
        # Get all files in the subdirectory
        files = [f for f in glob.glob(os.path.join(subdir, '*')) if os.path.isfile(f)]
        result[subdir] = files
    
    return result

def convert_xls_to_parquet(directory = "002_V2"):
    """
    """
    # Path to the directory containing the .xls files
    
    # Check if directory exists
    if not os.path.exists(directory):
        print(f"Directory {directory} does not exist.")
    else:
        # Find all .xls files in the directory
        xls_files = glob.glob(os.path.join(directory, "*.xls"))
        
        if not xls_files:
            print(f"No .xls files found in {directory}.")
        else:
            print(f"Found {len(xls_files)} .xls files.")
            
            # Process each file
            all_prog_list = []
            all_data_list = []
            for file_path in xls_files:
                print(f"\nProcessing: {file_path}")
                
                # Read the Excel file
                try:
                    # Read all sheets
                    setitecfile = abyss.dataparser.loadSetitecXls(file_path)
                    print(f"Data loaded from {file_path}")
                    # print(setitecfile[6])
                    # Append to the list
                    prog = pd.DataFrame(setitecfile[-3])
                    # print(prog)
                    data = setitecfile[-1]
                    prog['filename'] = os.path.basename(file_path)
                    data['filename'] = os.path.basename(file_path)
                    all_prog_list.append(prog)
                    all_data_list.append(data)

                        
                except Exception as e:
                    print(f"Error reading {file_path}: {str(e)}")
            
            # Concatenate all dataframes
            all_prog = pd.concat(all_prog_list, ignore_index=True)
            all_data = pd.concat(all_data_list, ignore_index=True)
            # Save the concatenated dataframe to a parquet file
            all_prog.to_parquet(f"{directory}_pset.parquet")
            all_data.to_parquet(f"{directory}_data.parquet")


if __name__ == "__main__":
    # List all subdirectories and files
    directories_and_files = list_directories_and_files()
    # print(directories_and_files)
    # Convert all .xls files to .parquet
    for directory in directories_and_files.keys():
        print(f"\nProcessing {directory}...")
        convert_xls_to_parquet(directory)
        print(f"Finished processing {directory}.")


Processing .\001_V1...
Found 23 .xls files.

Processing: .\001_V1\V1_001.xls
Data loaded from .\001_V1\V1_001.xls

Processing: .\001_V1\V1_002.xls
Data loaded from .\001_V1\V1_002.xls

Processing: .\001_V1\V1_003.xls
Data loaded from .\001_V1\V1_003.xls

Processing: .\001_V1\V1_004.xls
Data loaded from .\001_V1\V1_004.xls

Processing: .\001_V1\V1_005.xls
Data loaded from .\001_V1\V1_005.xls

Processing: .\001_V1\V1_006.xls
Data loaded from .\001_V1\V1_006.xls

Processing: .\001_V1\V1_007.xls
Data loaded from .\001_V1\V1_007.xls

Processing: .\001_V1\V1_008.xls
Data loaded from .\001_V1\V1_008.xls

Processing: .\001_V1\V1_009.xls
Data loaded from .\001_V1\V1_009.xls

Processing: .\001_V1\V1_010.xls
Data loaded from .\001_V1\V1_010.xls

Processing: .\001_V1\V1_011.xls
Data loaded from .\001_V1\V1_011.xls

Processing: .\001_V1\V1_012.xls
Data loaded from .\001_V1\V1_012.xls

Processing: .\001_V1\V1_013.xls
Data loaded from .\001_V1\V1_013.xls

Processing: .\001_V1\V1_014.xls
Data loaded 

In [21]:
pd.read_parquet("all_data.parquet")

Unnamed: 0,Position (mm),I Torque (A),I Thrust (A),I Torque Empty (A),I Thrust Empty (A),Step (nb),Stop code,Mem Torque min (A),Mem Thrust min (A),Rotation Speed (rpm),Feed Speed (mm/s),Torque Power (W),filename
0,-0.669000,-0.049,0.000,5.908,0.375,2.0,0.0,0.0,0.0,-3977.824951,3.001,101.0,V2__001.xls
1,-0.699000,-0.098,0.000,5.908,0.375,2.0,0.0,0.0,0.0,-3973.601074,2.999,103.0,V2__001.xls
2,-0.726000,-0.049,-0.012,5.908,0.375,2.0,0.0,0.0,0.0,-3975.712891,3.002,106.0,V2__001.xls
3,-0.756000,-0.098,0.000,5.908,0.375,2.0,0.0,0.0,0.0,-3976.769043,3.011,109.0,V2__001.xls
4,-0.787000,-0.049,0.000,5.908,0.375,2.0,0.0,0.0,0.0,-3978.881104,3.021,112.0,V2__001.xls
...,...,...,...,...,...,...,...,...,...,...,...,...,...
170939,-25.007999,0.488,1.851,2.539,0.176,3.0,0.0,0.0,0.0,-1025.343018,0.518,16.0,V2__097.xls
170940,-25.007999,0.732,2.308,2.539,0.176,3.0,0.0,0.0,0.0,-1040.126953,0.499,17.0,V2__097.xls
170941,-25.007999,0.732,2.308,2.539,0.176,3.0,0.0,0.0,0.0,-1040.126953,0.499,17.0,V2__097.xls
170942,-25.007999,0.732,2.308,2.539,0.176,3.0,8.0,0.0,0.0,-1040.126953,0.499,17.0,V2__097.xls


In [22]:
pd.read_parquet("all_prog.parquet")

Unnamed: 0,Step Nb,Step On/Off,DEP (mm),RPM,AV (mm/s),AV (mm/tr),Thrust Max (A),Torque Max (A),Thrust Min (A),Torque Min (A),...,Peck (nb),Delay (ms),Stroke Limit (A),Thrust Limit (A),Torque Limit (A),LUB AIR,LUB FLOW,Vacuum,Material,filename
0,0.0,0.0,0.0,0.0,0.00,inf,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,V2__001.xls
1,1.0,0.0,10.0,1500.0,2.50,0.100,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,V2__001.xls
2,2.0,1.0,15.7,4000.0,3.00,0.045,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,V2__001.xls
3,3.0,1.0,6.8,1000.0,0.75,0.045,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,6.0,4.0,0.0,0.0,V2__001.xls
4,0.0,0.0,0.0,0.0,0.00,inf,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,V2__002.xls
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,3.0,1.0,9.3,1000.0,0.75,0.045,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,6.0,4.0,0.0,0.0,V2__096.xls
384,0.0,0.0,0.0,0.0,0.00,inf,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,V2__097.xls
385,1.0,0.0,10.0,1500.0,2.50,0.100,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,V2__097.xls
386,2.0,1.0,15.7,4000.0,3.00,0.045,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,V2__097.xls
