In [10]:
pip install matlab

Note: you may need to restart the kernel to use updated packages.


In [11]:
import matlab.engine
import numpy as np
import pandas as pd
import os
import time
import shutil

base_dir = '/Users/shiveshjha/Downloads/TDTR_WITH_ML-MAIN/equation_generation/'  

temp_output_dir = base_dir  
csv_output_dir = os.path.join(base_dir, 'datasets') 

os.makedirs(csv_output_dir, exist_ok=True)

ModuleNotFoundError: No module named 'matlab.engine'; 'matlab' is not a package

In [None]:

def generate_lines(intercept, x_end, y_end, num_lines, start_id):
    x_values_to_compute = [80, 160, 240, 320, 400]
    min_slope = max(-intercept / x for x in x_values_to_compute)
    max_slope = min((y_end - intercept) / x for x in x_values_to_compute)
    
    data = []
    slopes = np.linspace(min_slope, max_slope, num_lines)
    equation_id = start_id
    
    for slope in slopes:
        equation = f"y = {slope:.4f}x + {intercept}"
        valid_equation = True
        y_values = []
        
        for x in x_values_to_compute:
            y = slope * x + intercept
            if y <= 0:
                valid_equation = False
                break
            y_values.append(y)
            
        if valid_equation:
            for x, y in zip(x_values_to_compute, y_values):
                data.append([equation, slope, intercept, equation_id, x, y])
            equation_id += 1
            
    return pd.DataFrame(data, columns=[
        "Equations", "Slope", "Intercept", "Equation ID", 
        "x (Thickness)", "y (Thermal Conductivity Profile)"
    ])

In [None]:

def process_output_file(output_file):
    print(f"Reading file: {output_file}")
    with open(output_file, 'r') as file:
        lines = file.readlines()
        print(f"Number of lines in file: {len(lines)}")
        data = [line.strip().split() for line in lines if line.strip()]
        print(f"Number of data lines: {len(data)}")
        filtered_data = [row for row in data if len(row) >= 2]
        print(f"Number of filtered data points: {len(filtered_data)}")
        if filtered_data:
            print(f"Sample data point: {filtered_data[0]}")
    return [float(row[0]) for row in filtered_data], [float(row[-1]) for row in filtered_data]

In [None]:

def process_equations(start_id, end_id, intercept, y_end):
    print(f"\nStarting process for equations {start_id} to {end_id}")
    

    df = generate_lines(
        intercept=intercept,
        x_end=400,
        y_end=y_end,
        num_lines=100,
        start_id=start_id
    )
    print(f"Generated DataFrame shape: {df.shape}")
    
    # Create equation mapping
    equation_mapping = {
        row["Equation ID"]: {"Slope": row["Slope"], "Intercept": row["Intercept"]}
        for _, row in df.iterrows()
    }
    print(f"Number of equations mapped: {len(equation_mapping)}")
    
    # Prepare arrays for MATLAB
    lambda_arrays = df.groupby('Equation ID')['y (Thermal Conductivity Profile)'].apply(list).values.tolist()
    file_names = df['Equation ID'].unique().tolist()
    print(f"Number of unique equations: {len(file_names)}")
    
    # Add fixed elements
    fixed_prefix = [140, 0.06]
    fixed_suffix = [0.1, 140]
    lambda_arrays_full = [fixed_prefix + arr + fixed_suffix for arr in lambda_arrays]
    file_names_full = ['Equation_' + str(name) for name in file_names]
    
    # Start MATLAB engine
    eng = matlab.engine.start_matlab()
    
    # Run MATLAB processing
    for lambda_array, file_name in zip(lambda_arrays_full, file_names_full):
        try:
            print(f"\nProcessing {file_name}")
            print(f"Lambda array: {lambda_array}")
            eng.TDTR_MAIN_V4(matlab.double(lambda_array), file_name, nargout=0)
            
            # Update file path check
            output_file = os.path.join(base_dir, f"{file_name}.txt")
            if os.path.exists(output_file):
                print(f"Successfully created: {output_file}")
            else:
                print(f"File not created: {output_file}")
            
            time.sleep(1)
        except Exception as e:
            print(f"Error with {file_name}: {e}")
            continue
    
    eng.quit()
    
    # Process results with updated file paths
    results = []
    print("\nProcessing output files:")
    for file_name in file_names_full:
        output_file = os.path.join(base_dir, f"{file_name}.txt")
        print(f"\nChecking file: {output_file}")
        
        if os.path.exists(output_file):
            print(f"Found file: {output_file}")
            first_column, last_column = process_output_file(output_file)
            print(f"Data points found: {len(first_column)}")
            
            equation_id = int(file_name.split('_')[1])
            if equation_id in equation_mapping:
                slope = equation_mapping[equation_id]['Slope']
                intercept = equation_mapping[equation_id]['Intercept']
                original_rows = df[df['Equation ID'] == equation_id]
                print(f"Original rows for equation {equation_id}: {len(original_rows)}")
                
                for tdelay, model_ratio in zip(first_column, last_column):
                    for _, original_row in original_rows.iterrows():
                        results.append({
                            'Equations': original_row['Equations'],
                            'Slope': slope,
                            'Intercept': intercept,
                            'Equation ID': equation_id,
                            'x (Thickness)': original_row['x (Thickness)'],
                            'y (Thermal Conductivity Profile)': original_row['y (Thermal Conductivity Profile)'],
                            'Tdelay': tdelay,
                            'ModelRatio': model_ratio
                        })
        else:
            print(f"File not found: {output_file}")
    
    print(f"\nTotal results collected: {len(results)}")
    
    # Create final DataFrame
    final_df = pd.DataFrame(results)
    print(f"Final DataFrame shape: {final_df.shape}")
    
    # Save to CSV
    csv_filename = f'Dataset_{start_id}_{end_id}.csv'
    csv_path = os.path.join(csv_output_dir, csv_filename)
    final_df.to_csv(csv_path, index=False)
    print(f"Saved CSV to: {csv_path}")
    
    # Clean up temporary txt files
    for file_name in file_names_full:
        txt_file = os.path.join(base_dir, f"{file_name}.txt")
        if os.path.exists(txt_file):
            os.remove(txt_file)
            print(f"Cleaned up: {txt_file}")
    
    return final_df

In [None]:
# # Cell 5: Generate datasets
# # First batch (1001-1011)
# df_1 = process_equations(
#     start_id=1001,
#     end_id=1011,
#     intercept=5.1,
#     y_end=5.2
# )

# # Optional: Generate more batches
# # Second batch (1012-1022)
# df_2 = process_equations(
#     start_id=1012,
#     end_id=1022,
#     intercept=5.2,
#     y_end=5.3
# )

# # Third batch (1023-1033)
# df_3 = process_equations(
#     start_id=1023,
#     end_id=1033,
#     intercept=5.3,
#     y_end=5.4
# )

In [None]:
# # start_id = 2034  # Start from next available ID

# # Define physically meaningful parameter ranges
# # parameter_sets = [
# #     # Format: (intercept, y_end) pairs
# #     # Higher thermal conductivity range (metals, high conductivity materials)
# #     (8.5, 8.7),    # Like copper (~8.5 W/mK)
# #     (7.8, 8.0),    # Like aluminum (~7.9 W/mK)
# #     (4.5, 4.7),    # Like brass (~4.6 W/mK)
    
# #     # Medium thermal conductivity range (semiconductors, ceramics)
# #     (3.8, 4.0),    # Like silicon carbide (~3.9 W/mK)
# #     (3.2, 3.4),    # Like silicon (~3.3 W/mK)
# #     (2.8, 3.0),    # Like aluminum oxide (~2.9 W/mK)
    
# #     # Lower thermal conductivity range (insulators, polymers)
# #     (2.2, 2.4),    # Like zirconia (~2.3 W/mK)
# #     (1.8, 2.0),    # Like glass (~1.9 W/mK)
# #     (1.4, 1.6),    # Like polymers (~1.5 W/mK)
# #     (1.0, 1.2)     # Like ceramics (~1.1 W/mK)
# # ]

# # for batch in range(10):  # 10 batches
# #     batch_start = start_id + (batch * 100)
# #     batch_end = batch_start + 100
    
# #     # Get parameters for this batch
# #     intercept, y_end = parameter_sets[batch]
    
# #     print(f"\nProcessing Batch {batch + 1}/10")
# #     print(f"IDs: {batch_start} to {batch_end}")
# #     print(f"Intercept: {intercept} (W/mK), y_end: {y_end} (W/mK)")
    
# #     df = process_equations(
# #         start_id=batch_start,
# #         end_id=batch_end,
# #         intercept=intercept,
# #         y_end=y_end
# #     )

# # Alternative version with more focused mid-range materials

# # Mid-range focused parameters for better learning
# # parameter_sets = [
# #     (4.2, 4.4),
# #     (4.0, 4.2),
# #     (3.8, 4.0),
# #     (3.6, 3.8),
# #     (3.4, 3.6),
# #     (3.2, 3.4),
# #     (3.0, 3.2),
# #     (2.8, 3.0),
# #     (2.6, 2.8),
# #     (2.4, 2.6)
# # ]

# # for batch in range(10):
# #     batch_start = start_id + (batch * 100)
# #     batch_end = batch_start + 100
    
# #     intercept, y_end = parameter_sets[batch]
    
# #     print(f"\nProcessing Batch {batch + 1}/10")
# #     print(f"IDs: {batch_start} to {batch_end}")
# #     print(f"Intercept: {intercept} (W/mK), y_end: {y_end} (W/mK)")
    
# #     df = process_equations(
# #         start_id=batch_start,
# #         end_id=batch_end,
# #         intercept=intercept,
# #         y_end=y_end
# #     )





# # Start ID after your last equation
# start_id = 3034 

# parameter_sets = [
#     # Higher conductivity materials
#     (6.8, 7.0),    
#     (6.4, 6.6),    
#     (6.0, 6.2),    
#     (5.6, 5.8),    
#     (5.2, 5.4),    

#     # Mid-range materials
#     (4.8, 5.0),
#     (4.4, 4.6),
#     (4.0, 4.2),
#     (3.6, 3.8),
#     (3.2, 3.4)
# ]


# """
# parameter_sets = [
#     (7.2, 7.4),
#     (6.6, 6.8),
#     (6.0, 6.2),
#     (5.4, 5.6),
#     (4.8, 5.0),
#     (4.2, 4.4),
#     (3.6, 3.8),
#     (3.0, 3.2),
#     (2.4, 2.6),
#     (1.8, 2.0)
# ]
# """

# # Generate equations
# for batch in range(10):
#     batch_start = start_id + (batch * 100)
#     batch_end = batch_start + 100
    
#     intercept, y_end = parameter_sets[batch]
    
#     print(f"\nProcessing Batch {batch + 1}/10")
#     print(f"IDs: {batch_start} to {batch_end}")
#     print(f"Intercept: {intercept} (W/mK), y_end: {y_end} (W/mK)")
    
#     df = process_equations(
#         start_id=batch_start,
#         end_id=batch_end,
#         intercept=intercept,
#         y_end=y_end
#     )








# Start ID after your last batch
start_id = 4034

# New diverse parameter sets targeting different ranges and filling gaps
parameter_sets = [
    # Very high conductivity materials (9-10 W/mK range)
    (9.2, 9.4),    # Like silver (~9.3 W/mK)
    (8.8, 9.0),    # Transition range
    
    # Fill gaps in high range (7-8 W/mK)
    (7.6, 7.8),    # Between aluminum and copper ranges
    (7.2, 7.4),    # High-end alloy range
    
    # Fill gaps in medium-high range (5-6 W/mK)
    (5.8, 6.0),    # Like bronze (~5.9 W/mK)
    (5.4, 5.6),    # Medium-high transition
    
    # Fill gaps in medium-low range (2-3 W/mK)
    (2.6, 2.8),    # Like boron nitride (~2.7 W/mK)
    (2.2, 2.4),    # Like magnesia (~2.3 W/mK)
    
    # Low conductivity materials (< 1 W/mK)
    (0.8, 1.0),    # Like some ceramics (~0.9 W/mK)
    (0.4, 0.6)     # Like some polymers (~0.5 W/mK)
]


for batch in range(10):
    batch_start = start_id + (batch * 100)
    batch_end = batch_start + 100
    
    intercept, y_end = parameter_sets[batch]
    
    print(f"\nProcessing Batch {batch + 1}/10")
    print(f"IDs: {batch_start} to {batch_end}")
    print(f"Intercept: {intercept} (W/mK), y_end: {y_end} (W/mK)")
    
    df = process_equations(
        start_id=batch_start,
        end_id=batch_end,
        intercept=intercept,
        y_end=y_end
    )



start_id = 5034 


alternative_parameter_sets = [
    # Fine gradations in high range
    (8.4, 8.6),
    (8.0, 8.2),
    (7.8, 8.0),
    
    # Fine gradations in medium range
    (6.6, 6.8),
    (6.2, 6.4),
    (5.0, 5.2),
    
    # Fine gradations in lower range
    (1.6, 1.8),
    (1.2, 1.4),
    (0.6, 0.8),
    (0.2, 0.4)
]


"""
for batch in range(10):
    batch_start = start_id + (batch * 100)
    batch_end = batch_start + 100
    
    intercept, y_end = alternative_parameter_sets[batch]
    
    print(f"\nProcessing Batch {batch + 1}/10")
    print(f"IDs: {batch_start} to {batch_end}")
    print(f"Intercept: {intercept} (W/mK), y_end: {y_end} (W/mK)")
    
    df = process_equations(
        start_id=batch_start,
        end_id=batch_end,
        intercept=intercept,
        y_end=y_end
    )
"""