In [1]:

import matlab.engine
import numpy as np
import pandas as pd
import os
import time
import shutil


base_dir = '/Users/shiveshjha/Downloads/TDTR_WITH_ML-MAIN/equation_generation/'
temp_output_dir = base_dir
csv_output_dir = os.path.join(base_dir, 'datasets_complex')
os.makedirs(csv_output_dir, exist_ok=True)

In [2]:

def scale_to_bounds(y_values, min_val, max_val):
    """
    Scale the y-values to ensure they stay within physical bounds while preserving pattern.
    
    Parameters:
    -----------
    y_values : numpy.ndarray
        Array of y-values to scale
    min_val : float
        Minimum allowed value
    max_val : float
        Maximum allowed value
    
    Returns:
    --------
    numpy.ndarray
        Scaled y-values within the specified bounds
    """
   
    y_norm = (y_values - np.min(y_values)) / (np.max(y_values) - np.min(y_values))
    
    
    y_scaled = y_norm * (max_val - min_val) + min_val
    
    return y_scaled


def generate_quadratic_equations(intercept, y_end, num_equations, start_id):
    """
    Generate quadratic thermal conductivity profiles.
    The quadratic equations are carefully crafted to ensure:
    1. They start at the specified intercept
    2. Stay within physical bounds
    3. Create meaningful thermal conductivity patterns
    """

    x_values = np.linspace(80, 400, 100)
    

    data = []
    equation_id = start_id
    
    for _ in range(num_equations):
       
        a = np.random.uniform(-0.0001, 0.0001)
        
        
        x_end = 400
        b = (y_end - a*x_end**2 - intercept) / x_end
        
        
        y_values = a * x_values**2 + b * x_values + intercept
        
     
        equation = f"y = {a:.6f}xÂ² + {b:.4f}x + {intercept:.2f}"
        
     
        if all(y_values > 0):
            for x, y in zip(x_values, y_values):
                data.append([
                    equation,
                    'quadratic',
                    equation_id,
                    x,
                    y
                ])
            equation_id += 1
    
    # Create DataFrame
    df = pd.DataFrame(data, columns=[
        "Equations",
        "Equation Type",
        "Equation ID",
        "x (Thickness)",
        "y (Thermal Conductivity Profile)"
    ])
    
    return df



In [3]:
# Process output file function
def process_output_file(output_file):
    """Process MATLAB output files"""
    print(f"Reading file: {output_file}")
    with open(output_file, 'r') as file:
        lines = file.readlines()
        print(f"Number of lines in file: {len(lines)}")
        data = [line.strip().split() for line in lines if line.strip()]
        filtered_data = [row for row in data if len(row) >= 2]
        if filtered_data:
            print(f"Sample data point: {filtered_data[0]}")
    return [float(row[0]) for row in filtered_data], [float(row[-1]) for row in filtered_data]


In [4]:
# Main processing function
def process_equations(start_id, end_id, intercept, y_end):
    """
    Process quadratic equations through MATLAB.
    Now simplified to work with the 9-layer structure directly.
    """
    print(f"\nStarting process for equations {start_id} to {end_id}")
    
    # Generate quadratic equations
    df = generate_quadratic_equations(
        intercept=intercept,
        y_end=y_end,
        num_equations=100,
        start_id=start_id
    )
    print(f"Generated DataFrame shape: {df.shape}")
    
    # Process each equation
    results = []
    for equation_id in df['Equation ID'].unique():
        profile = df[df['Equation ID'] == equation_id]['y (Thermal Conductivity Profile)'].tolist()
        
        # Create 9-layer structure for MATLAB
        # Sample 5 evenly spaced points from the profile for the middle layers
        indices = np.linspace(0, len(profile)-1, 5, dtype=int)
        five_layers = [profile[i] for i in indices]
        
        lambda_array = [
            140,            # Al surface
            0.06,          # First interface
            five_layers[0], # Layer 1
            five_layers[1], # Layer 2
            five_layers[2], # Layer 3
            five_layers[3], # Layer 4
            five_layers[4], # Layer 5
            0.1,           # Second interface
            140            # Substrate
        ]
        
        try:
            file_name = f'Equation_{equation_id}'
            
            # Process through MATLAB
            eng = matlab.engine.start_matlab()
            eng.TDTR_MAIN_V4(matlab.double(lambda_array), file_name, nargout=0)
            eng.quit()
            
            # Process results
            output_file = os.path.join(base_dir, f"{file_name}.txt")
            if os.path.exists(output_file):
                first_column, last_column = process_output_file(output_file)
                
                # Store results
                for tdelay, model_ratio in zip(first_column, last_column):
                    original_rows = df[df['Equation ID'] == equation_id]
                    for _, row in original_rows.iterrows():
                        results.append({
                            'Equations': row['Equations'],
                            'Equation Type': row['Equation Type'],
                            'Equation ID': equation_id,
                            'x (Thickness)': row['x (Thickness)'],
                            'y (Thermal Conductivity Profile)': row['y (Thermal Conductivity Profile)'],
                            'Tdelay': tdelay,
                            'ModelRatio': model_ratio
                        })
                
                os.remove(output_file)
            
        except Exception as e:
            print(f"Error processing {file_name}: {e}")
            continue
    
    # Create final DataFrame
    final_df = pd.DataFrame(results)
    
    # Save to CSV
    csv_filename = f'Dataset_{start_id}_{end_id}.csv'
    csv_path = os.path.join(csv_output_dir, csv_filename)
    final_df.to_csv(csv_path, index=False)
    
    return final_df


In [5]:
# Main execution
start_id = 4034

parameter_sets = [
    (9.2, 9.4),    # Like silver
    (8.8, 9.0),    # Transition range
    (7.6, 7.8),    # Between aluminum and copper ranges
    (7.2, 7.4),    # High-end alloy range
    (5.8, 6.0),    # Like bronze
    (5.4, 5.6),    # Medium-high transition
    (2.6, 2.8),    # Like boron nitride
    (2.2, 2.4),    # Like magnesia
    (0.8, 1.0),    # Like some ceramics
    (0.4, 0.6)     # Like some polymers
]

# Generate equations for each parameter set
for batch in range(10):
    batch_start = start_id + (batch * 100)
    batch_end = batch_start + 100
    
    intercept, y_end = parameter_sets[batch]
    
    print(f"\nProcessing Batch {batch + 1}/10")
    print(f"IDs: {batch_start} to {batch_end}")
    print(f"Intercept: {intercept} (W/mK), y_end: {y_end} (W/mK)")
    
    df = process_equations(
        start_id=batch_start,
        end_id=batch_end,
        intercept=intercept,
        y_end=y_end
    )


Processing Batch 1/10
IDs: 4034 to 4134
Intercept: 9.2 (W/mK), y_end: 9.4 (W/mK)

Starting process for equations 4034 to 4134
Generated DataFrame shape: (10000, 5)
Fitting Solution:
Output file saved as: Equation_4034.txt
Program Completed
Reading file: /Users/shiveshjha/Downloads/TDTR_WITH_ML-MAIN/equation_generation/Equation_4034.txt
Number of lines in file: 60
Sample data point: ['100.000000', '0.001268', '-0.000416', '3.045946']
Fitting Solution:
Output file saved as: Equation_4035.txt
Program Completed
Reading file: /Users/shiveshjha/Downloads/TDTR_WITH_ML-MAIN/equation_generation/Equation_4035.txt
Number of lines in file: 60
Sample data point: ['100.000000', '0.001337', '-0.000564', '2.368886']
Fitting Solution:
Output file saved as: Equation_4036.txt
Program Completed
Reading file: /Users/shiveshjha/Downloads/TDTR_WITH_ML-MAIN/equation_generation/Equation_4036.txt
Number of lines in file: 60
Sample data point: ['100.000000', '0.001310', '-0.000494', '2.648877']
Fitting Solution