In [1]:
import os
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import multiprocessing

def process_file(file_path, columns):
    results = {col: {'min': float('inf'), 'max': float('-inf')} for col in columns}
    df = pd.read_csv(file_path)
    
    for col in columns:
        if col in df.columns:
            col_min = df[col].min()
            col_max = df[col].max()
            results[col]['min'] = col_min
            results[col]['max'] = col_max
    
    return results

def get_all_csv_files(directory):
    csv_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.csv'):
                csv_files.append(os.path.join(root, file))
    return csv_files

def get_min_max_values(directory, columns):
    file_paths = get_all_csv_files(directory)
    
    final_results = {col: {'min': float('inf'), 'max': float('-inf')} for col in columns}
    
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_file, file_path, columns) for file_path in file_paths]
        
        for future in futures:
            result = future.result()
            for col in columns:
                final_results[col]['min'] = min(final_results[col]['min'], result[col]['min'])
                final_results[col]['max'] = max(final_results[col]['max'], result[col]['max'])
    
    return final_results

# 사용 예시
if __name__ == '__main__':
    directory = r'D:\SamsungSTF\Processed_Data\TripByTrip'
    columns_to_check = ['speed', 'acceleration', 'ext_temp', 'int_temp']

    min_max_values = get_min_max_values(directory, columns_to_check)

    for col, values in min_max_values.items():
        print(f"{col}: 최소값 = {values['min']}, 최대값 = {values['max']}")

speed: 최소값 = 0.0, 최대값 = 60.83382
acceleration: 최소값 = -10.277860000000002, 최대값 = 8.611180000000001
ext_temp: 최소값 = -19, 최대값 = 48
int_temp: 최소값 = -15, 최대값 = 60
