In [None]:
from google.colab import drive
# Mount Google Drive to access files
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd

In [None]:
# Define the path to your file in Google Drive
file_path = ''  # Replace with your actual file path

# Load the dataset with ISO encoding
dataset = pd.read_csv(file_path, encoding='ISO-8859-1')

# Remove any leading or trailing spaces from column headers and values
dataset.columns = dataset.columns.str.strip()
dataset = dataset.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

# Select relevant columns
dataset = dataset[['Spincoating Speed',
                   'Substrates preheated Temperature',
                   'Antisolvent Used',
                   'Solution preheated temperature',
                   'Coverage Percentage',
                   'Average Size um']]
# Ensure 'NA' in 'Antisolvent Used' is treated as a category, not as missing data
dataset['Antisolvent Used'] = dataset['Antisolvent Used'].fillna('NA')
print("Dataset loaded successfully:")
print(dataset.head())

In [None]:
# Calculate min and max values for normalization
defect_min = dataset['Coverage Percentage'].min()
defect_max = dataset['Coverage Percentage'].max()
grain_size_min = dataset['Average Size um'].min()
grain_size_max = dataset['Average Size um'].max()


In [None]:
def calculate_weighted_reward(defect_percentage, average_grain_size):
    """
    Calculate reward with 70% weight on maximizing grain size and 30% weight on minimizing defect percentage.
    """
    normalized_defect = (defect_percentage - defect_min) / (defect_max - defect_min)
    normalized_grain_size = (average_grain_size - grain_size_min) / (grain_size_max - grain_size_min)
    reward = (0.7 * normalized_grain_size) - (0.3 * normalized_defect)
    return reward

# Initialize weights for each parameter
parameter_weights = {
    'Spincoating Speed': 0.25,
    'Substrates preheated Temperature': 0.25,
    'Antisolvent Used': 0.25,
    'Solution preheated temperature': 0.25
}

def adjust_parameter_weights(parameter_effects, parameter_weights, learning_rate=0.1):
    """
    Adjust weights for each parameter based on their relative effects on the total reward.
    """
    total_effect = sum(parameter_effects.values())
    if total_effect == 0:
        return parameter_weights  # Skip adjustment if no changes

    # Adjust each parameter weight
    for param, effect in parameter_effects.items():
        contribution = effect / total_effect  # Normalize effect
        parameter_weights[param] += learning_rate * (contribution - parameter_weights[param])

    # Normalize weights to sum to 1
    weight_sum = sum(parameter_weights.values())
    for param in parameter_weights:
        parameter_weights[param] /= weight_sum

    return parameter_weights

def get_parameter_row(param, new_value):
    """
    Retrieve a row where only the specified parameter matches the new value.
    """
    rows_with_new_value = dataset[dataset[param] == new_value]
    if not rows_with_new_value.empty:
        return rows_with_new_value.sample(n=1).iloc[0]
    else:
        raise ValueError(f"No rows found with {param} = {new_value}.")

def calculate_individual_rewards(current_row, action):
    """
    Calculate rewards for each parameter independently based on the new value of the action parameter,
    while retaining the defect percentage and grain size for the unchanged parameters.
    """
    individual_rewards = {}

    # Calculate rewards for each parameter, changing only one parameter at a time
    for param in ['Spincoating Speed', 'Substrates preheated Temperature', 'Antisolvent Used', 'Solution preheated temperature']:
        if action == param:
            next_state_row = get_parameter_row(param, current_row[param])
            defect_percentage = next_state_row['Coverage Percentage']
            grain_size = next_state_row['Average Size um']
        else:
            defect_percentage = current_row['Coverage Percentage']
            grain_size = current_row['Average Size um']

        # Calculate weighted reward for this parameter
        individual_rewards[param] = calculate_weighted_reward(defect_percentage, grain_size)

    # Sum individual rewards to get total reward
    total_reward = sum([individual_rewards[param] * parameter_weights[param] for param in individual_rewards])
    return total_reward, individual_rewards

In [None]:
# Initialize Q-Learning parameters
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.1  # Exploration factor

# Initialize Q-Table
Q_table = {}
num_episodes = 1000
episode_rewards = []  # List to store the total reward for each episode

In [None]:
for episode in range(num_episodes):
    # Start with a random initial state
    initial_index = np.random.choice(len(dataset))
    current_row = dataset.iloc[initial_index]
    total_episode_reward = 0

    for step in range(100):
        # Choose a random parameter (action) to change
        action = np.random.choice(['Spincoating Speed', 'Substrates preheated Temperature', 'Antisolvent Used', 'Solution preheated temperature'])

        # Calculate rewards
        total_reward, individual_rewards = calculate_individual_rewards(current_row, action)
        total_episode_reward += total_reward

        # Track parameter effects based on individual rewards
        parameter_effects = {param: abs(individual_rewards[param] - total_reward) for param in individual_rewards}

        # Adjust weights for parameters based on their effects
        parameter_weights = adjust_parameter_weights(parameter_effects, parameter_weights)

        # Q-Table update
        state_action = (current_row['Spincoating Speed'], current_row['Substrates preheated Temperature'],
                        current_row['Antisolvent Used'], current_row['Solution preheated temperature'], action)

        if state_action not in Q_table:
            Q_table[state_action] = 0.0

        # Update rule for Q-table
        Q_table[state_action] += alpha * (total_reward - Q_table[state_action])

        # Move to the next state by updating the chosen parameter only
        next_state_row = get_parameter_row(action, current_row[action])
        current_row[action] = next_state_row[action]  # Update the selected parameter

    # Store the episode's total reward
    episode_rewards.append(total_episode_reward)

# Save the episode rewards to a CSV file for later plotting
rewards_file_path = '' #replace with actual path
pd.DataFrame(episode_rewards, columns=['Reward']).to_csv(rewards_file_path, index=False)

# Print the final parameter weights

print(f"Episode rewards saved successfully at {rewards_file_path}")

In [None]:
# Define the path to the episode rewards file
rewards_file_path = ''

# Load the rewards data
rewards_data = pd.read_csv(rewards_file_path)

# Plot 1: Rewards vs. Episodes
plt.figure(figsize=(10, 5))
plt.plot(rewards_data['Reward'], label='Reward per Episode')
plt.xlabel('Episodes')
plt.ylabel('Reward')
plt.title('Rewards vs. Episodes')
plt.legend()
plt.show()

# Plot 2: Rewards Distribution
plt.figure(figsize=(10, 5))
plt.hist(rewards_data['Reward'], bins=20, edgecolor='k', alpha=0.7)
plt.xlabel('Reward')
plt.ylabel('Frequency')
plt.title('Rewards Distribution')
plt.show()

In [None]:
from collections import Counter

# Define a high reward threshold based on the top 15% of rewards
reward_values = list(Q_table.values())
threshold = np.percentile(reward_values, 85)  # Top 15% threshold

# Filter Q-table for entries with rewards above the threshold
high_reward_entries = [(state_action, reward) for state_action, reward in Q_table.items() if reward >= threshold]

# Count occurrences of each parameter set in high-reward entries
parameter_counts = Counter(state_action[:-1] for state_action, reward in high_reward_entries)  # Ignore action in count

# Sort parameter sets by frequency of high rewards
most_frequent_high_reward_params = parameter_counts.most_common(6)

# Display the results
print("Parameter sets that yielded high rewards multiple times:")
for params, count in most_frequent_high_reward_params:
    spin_speed, substrate_temp, antisolvent, sol_temp = params
    print({
        'Spincoating Speed': spin_speed,
        'Substrates preheated Temperature': substrate_temp,
        'Antisolvent Used': antisolvent,
        'Solution preheated Temperature': sol_temp,
        'High Reward Count': count
    })