In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
from functions import *
import csv
import numpy as np

# 1 Folder Data loading
The code is loading multiple CSV files from a specified folder path and converting them into pandas DataFrames.

In [None]:
folder_path = "data/e production=ep sanyo ep sanyo 005" 
csv_files = glob.glob(os.path.join(folder_path, "*.csv"))

dataframes = []
dfarrnames = []

dataframesCU = []
dfarrnamesCU = []

# Loop through the CSV files and load only those ending with "Format01=Kreis 5-064" and "TBA_CU"
for csv_file in csv_files:
    if "Format01=Kreis" in csv_file:
        if "TBA_Zyk" in csv_file:
            dfarr = pd.read_csv(csv_file, skiprows=[1])
            dfarr['Zeit'] = dfarr['Zeit'].apply(convert_to_linear_time)
            dfarr['Zeit'] = dfarr['Zeit'] - dfarr['Zeit'].iloc[0]
            dfarr['Zeit'] = dfarr['Zeit'] / 3600
            dataframes.append(dfarr)
            dfarrnames.append(csv_file)
        if "TBA_CU" in csv_file:
            dfarr = pd.read_csv(csv_file, skiprows=[1])
            dfarr['Zeit'] = dfarr['Zeit'].apply(convert_to_linear_time)
            dfarr['Zeit'] = dfarr['Zeit'] - dfarr['Zeit'].iloc[0]
            dfarr['Zeit'] = dfarr['Zeit'] / 3600
            dataframesCU.append(dfarr)
            dfarrnamesCU.append(csv_file)

noFiles = 0
for i, dfarr in enumerate(dataframes):
    num_rows, num_columns = dfarr.shape
    print(f"DataFrame {i + 1} - Rows: {num_rows}, Columns: {num_columns}")
    noFiles += 1
print(noFiles)

Check for test that data is loaded succesfully

In [None]:
num_rows, num_columns = dataframes[1].shape

print(f"Number of rows (length): {num_rows}")
print(f"Number of columns (width): {num_columns}")
# dataframes[0].head()

Deleting Invalid data for `TBA_CU`

In [None]:
valid_dataframes = []
valid_dataframesCU = []

for i, (df, dfCu) in enumerate(zip(dataframes, dataframesCU)):
    # Check if the 16th column is named 'Spannung' (Python uses zero-based indexing)
    if len(df.columns) > 15 and df.columns[15] == 'Spannung':
        valid_dataframes.append(df)
        valid_dataframesCU.append(dfCu)
    else:
        print('deleted:', i)

# Update dataframes with valid dataframes
dataframes = valid_dataframes
dataframesCU = valid_dataframesCU

noFiles = 0
for i, dfarr in enumerate(dataframes):
    num_rows, num_columns = dfarr.shape
    print(f"DataFrame {i + 1} - Rows: {num_rows}, Columns: {num_columns}")
    noFiles += 1
print(noFiles)

Deleting Invalid data for `Cycling test`

In [None]:
valid_dataframes = []
valid_dataframesCU = []

for i, (df, dfCu) in enumerate(zip(dataframes, dataframesCU)):
    # Check if the 16th column is named 'Spannung' (Python uses zero-based indexing)
    if len(dfCu.columns) > 15 and dfCu.columns[15] == 'Spannung':
        valid_dataframes.append(df)
        valid_dataframesCU.append(dfCu)
    else:
        print('deleted:', i)

# Update dataframes with valid dataframes
dataframes = valid_dataframes
dataframesCU = valid_dataframesCU

noFiles = 0
for i, dfarr in enumerate(dataframes):
    num_rows, num_columns = dfarr.shape
    print(f"DataFrame {i + 1} - Rows: {num_rows}, Columns: {num_columns}")
    noFiles += 1
print(noFiles)

In [None]:
for df in dataframes:
    print(df.shape)
for df in dataframesCU:
    print(df.shape)

### Extracting capacity
The code iterates through a list of DataFrames, filters rows based on specific conditions related to time and step number, and stores the filtered DataFrames in a new list `filtered_dataframes`.

This process seems to be part of data preprocessing or analysis related to battery tests, where the code is extracting capacity test data within a certain time frame after the battery is fully charged.

![Image Description](dcb03b19-5872-479c-b955-75536c9e13a7.png)


In [None]:
filtered_dataframes = []

for df in dataframesCU:
    schritt_mask = ((df['Schritt'] == 4) | (df['Schritt'] == 5)) & (df['Zeit'] >= 10)
    start_index = df[schritt_mask]
    start_time = start_index.iloc[0]['Zeit']
    end_time = start_time + 4
    print(start_index.iloc[0]['Zeit'])

    time_mask = (df['Zeit'] >= start_time) & (df['Zeit'] <= end_time) & ((df['Schritt'] == 4) | (df['Schritt'] == 5))

    filtered_df = df[time_mask]
    filtered_dataframes.append(filtered_df)

The code iterates through each DataFrame (`df`) in the `filtered_dataframes`. For each DataFrame, it extracts `maximum` and `minimum` values based on specified columns using the functions `max_threshold()` and `min_threshold()`. 

For each DataFrame, the maximum and minimum values are appended to `max_values_list` and `min_values_list` respectively. The capacity, calculated as the difference between the maximum and minimum values, is stored in `capacity_values_list`.


In [None]:
columns_to_extract = [9]

max_values_list = []
min_values_list = []
capacity_values_list = []
iteration_range = range(len(dataframes))

for df in filtered_dataframes:
    max_values = max_threshold(columns_to_extract, df, 0, 35)
    min_values = min_threshold(columns_to_extract, df, 0, 35)
    
    max_values_list .append(max_values)
    min_values_list.append(min_values)
    capacity = max_values[0]-min_values[0]
    capacity_values_list.append(capacity)

print(max_values_list)
print(min_values_list)
print(capacity_values_list)
print(iteration_range)

In [None]:
skip_indices = []

extracted_capacity_values = [value for i, value in enumerate(capacity_values_list) if i not in skip_indices]

iteration_range = range(len(extracted_capacity_values))

print(extracted_capacity_values)
print(iteration_range)

### Crop The Values


This script iterates through a list of dataframes and performs the following operations:

1. Seprate the data farame of each cycle or `Zyklus`.
2. Extracts a subset of the filtered dataframe from `3.65-3.85`.
3. Filters the subset based on the `Spannung` column values.
4. Calculates the maximum and minimum values of `AhAkku` column in the filtered dataframe.
5. Interpolates 50 values for `AhAkku` column.
6. Writes the interpolated values along with the difference between maximum and minimum `AhAkku` values to a CSV file.

CSV File Format:
- Each row corresponds to a set of interpolated values along with the difference between max and min AhAkku values.
- The columns represent the interpolated values (Interpolated_1 to 'Interpolated_50') followed by `Max_Min_Ahakku_Difference`.

Note: Ensure that the 'dataframes' variable is a list containing pandas DataFrames.

    

In [None]:
with open('temptest.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    # ['Interpolated_1', 'Interpolated_2', ..., 'Interpolated_50', 'Max_Min_Ahakku_Difference']

    I = 0
    for i, df in enumerate(dataframes):
        filtered_df = df[(df['Zyklus'] == 1)]
        quarter_len = len(filtered_df) // 4
        filtered_df = filtered_df.iloc[quarter_len:]

        filtered_df_ahakku = filtered_df[(filtered_df['Spannung'] >= 3.65) & (filtered_df['Spannung'] <= 3.85)]

        ahakku_values = filtered_df_ahakku['AhAkku'].values
        
        if len(ahakku_values) > 0:
            # Interpolate 50 values
            interpolated_values = np.interp(np.linspace(0, len(ahakku_values) - 1, 50), np.arange(len(ahakku_values)), ahakku_values)

            writer.writerow(list(interpolated_values) + [extracted_capacity_values[I]])

        I += 1