In [1]:
import os
import glob
import time
from datetime import datetime

from tqdm import tqdm
from shutil import copyfile
from ansys.mapdl.core import launch_mapdl, launcher, Mapdl

import torch
import numpy as np
import random

from Ansys_env import Env

from PPO_test import PPO

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
################################### Training ###################################

def test():

    print("============================================================================================")
    
    
    ################################## set device ##################################

    # set device to cpu or cuda
    device = torch.device('cpu')

    if(torch.cuda.is_available()): 
        device = torch.device('cuda') 
        torch.cuda.empty_cache()
        print("Device set to : " + str(torch.cuda.get_device_name(device)))
    else:
        print("Device set to : cpu")

    print("============================================================================================")

    ####### initialize environment hyperparameters ######

    env_name = "Ansys_assembly"

    quant_lim = 8
    max_ep_len = 20                   # max timesteps in one episode
    max_training_timesteps = int(100)   # break training loop if timeteps > max_training_timesteps

    print_freq = max_ep_len        # print avg reward in the interval (in num timesteps)
    log_freq = max_ep_len           # log avg reward in the interval (in num timesteps)
    save_model_freq = int(max_ep_len)          # save model frequency (in num timesteps)
    thresh = 0.42 ##controls the minimum distance between two fixtures
    
    
    #########################
    data_dir = '../Deformed_inputs_2mm/'
    train_file_list = np.load('train_file_list.npy')
    test_file_list = np.load('test_file_list.npy')
    original_input_filename = data_dir + 'input_step1.inp' ##use the step1 input to initialize for the bottom surface nodes
    initial_deform_file = data_dir + 'deform_step1_dp1.npz' ##only use to initialize
    initial_deform = np.load(initial_deform_file)['data']
    initial_fixture_locations = [2552,2578,2628]
    #########################
    
    max_num = 100
    ip = 'xx.3.127.xxx'
    port = 8800
    
    print("training environment name : " + env_name)
    

    #####################################################


    ## Note : print/log frequencies should be > than max_ep_len


    ################ PPO hyperparameters ################

    update_timestep = max_ep_len      # update policy every n timesteps
    K_epochs = 20               # update policy for K epochs in one PPO update
    train_batch = 5
    
    eps_clip = 0.2          # clip parameter for PPO
    gamma = 0.99            # discount factor

    lr_actor = 0.0003       # learning rate for actor network
    lr_critic = 0.001       # learning rate for critic network

    random_seed = 0         # set random seed if required (0 = no random seed)

    #####################################################
    
    ############### Network Hyperparameters #############
    
    shared_channel_list = [1, 8, 16, 16] #this may not be used when the shared module is GCN
    #actor_arm_dim_list = [256, 128, 64]
    #critic_arm_dim_list = [512, 128, 64]
    actor_arm_dim_list = [1024, 512, 256]
    critic_arm_dim_list = [512, 128, 64]
    emb_dims = 512
    feature_dims = 3
    k = 8


    ###################### logging ######################

    #### log files for multiple runs are NOT overwritten
    checkpoint_timestamp = 'deformed_inputs_nofix_metal'
    
    log_dir = "PPO_logs"
    if not os.path.exists(log_dir):
          os.makedirs(log_dir)

    log_dir = log_dir + '/' + env_name + '/'
    if not os.path.exists(log_dir):
          os.makedirs(log_dir)

    input_filename = log_dir + original_input_filename.split('/')[-1]
    copyfile(original_input_filename, input_filename)


    ################### checkpointing ###################

    run_num_pretrained = 0      #### change this to prevent overwriting weights in same env_name folder

    directory = "PPO_preTrained"
    if not os.path.exists(directory):
          os.makedirs(directory)

    directory = directory + '/' + env_name + '/'
    if not os.path.exists(directory):
          os.makedirs(directory)
    
    
    checkpoint_name = "deformed_inputs_nofix_metal_pretrained"
    checkpoint_path = directory + checkpoint_timestamp + '/' + checkpoint_name + ".pth"
    print("load the model from : " + checkpoint_path)

    #####################################################
    


    #### create new log file for this test
    log_f_name = log_dir + checkpoint_timestamp + '/' + 'PPO_test_' + checkpoint_name + ".txt"

    print("current logging run number for " + env_name + " : ", checkpoint_name)
    print("logging at : " + log_f_name)

    #####################################################
    
    print("initialize environment")
    
    env = Env(original_input_filename, input_filename, initial_fixture_locations, max_ep_len, thresh, ip, port, initial_deform)

    # state space dimension
    state_dim = env.get_state_shape()

    # action space dimension
    
    action2_dim = env.get_action_shape()


    ############# print all hyperparameters #############

    print("--------------------------------------------------------------------------------------------")

    print("max training timesteps : ", max_training_timesteps)
    print("max timesteps per episode : ", max_ep_len)

    print("--------------------------------------------------------------------------------------------")

    print("state space dimension : ", state_dim)
    
    print("action2 space dimension : ", action2_dim)

    print("--------------------------------------------------------------------------------------------")

    print("Initializing a discrete action space policy")

    if random_seed:
        print("--------------------------------------------------------------------------------------------")
        print("setting random seed to ", random_seed)
        torch.manual_seed(random_seed)
        env.seed(random_seed)
        np.random.seed(random_seed)
        random.seed(random_seed)

    #####################################################

    print("============================================================================================")

    ################# training procedure ################

    # initialize a PPO agent
    ppo_agent = PPO(state_dim, action2_dim, shared_channel_list, actor_arm_dim_list, critic_arm_dim_list, 
                    emb_dims, feature_dims, k, lr_actor, lr_critic, gamma, K_epochs, 
                    eps_clip, device, checkpoint_path)


    # track total training time
    start_time = datetime.now().replace(microsecond=0)
    print("Started training at (GMT) : ", start_time)

    print("============================================================================================")


    # logging file
    log_f = open(log_f_name,"w+")
    log_f.write('action2,timestep,reward\n')


    # printing and logging variables
    print_running_reward = 0
    print_running_episodes = 0

    log_running_reward = 0
    log_running_episodes = 0

    time_step = 0
    i_episode = 0
    idx = 0

    # training loop
    while time_step <= max_training_timesteps:
        
        
        
        
        original_input_filename = test_file_list[idx]
        design_point = original_input_filename.split('/')[-1].split('.')[0].split('_')[-1]
        initial_deform_file = data_dir + 'deform_step1_' + design_point + '.npz'
        initial_deform = np.load(initial_deform_file)['data']
        input_filename = log_dir + checkpoint_timestamp + '/' + 'test_input/' + original_input_filename.split('/')[-1]
        copyfile(original_input_filename, input_filename)
        print('current using ' + original_input_filename.split('/')[-1])
        
        x, state, mask = env.reset(initial_fixture_locations, original_input_filename, input_filename, initial_deform.copy())
        #print(state.shape)
        current_ep_reward = 0
        idx += 1

        for t in tqdm(range(1, max_ep_len+1)):

            # select action with policy
            action2 = ppo_agent.select_action(x, state, mask)
            #print(action2)
            
            x, state, reward, done, mask = env.step(action2, original_input_filename, input_filename, quant_lim, initial_deform.copy())
            print(t)
            print(np.linalg.norm(state[0,:,:3], axis = 1).max())
            #print(np.linalg.norm(state[0,:,3:6], axis = 1).max())
            print(reward)
            #print(done)
            # saving reward and is_terminals
            ppo_agent.buffer.rewards.append(reward)
            ppo_agent.buffer.is_terminals.append(done)

            time_step +=1
            current_ep_reward += reward


            log_f.write('{},{},{}\n'.format(action2, time_step, reward))
            log_f.flush()



            # break; if the episode is over
            if done:
                print('done')
                break
                
        if idx >= 5:
            break

        
    log_f.close()
    




    # print total training time
    print("============================================================================================")
    end_time = datetime.now().replace(microsecond=0)
    print("Started testing at (GMT) : ", start_time)
    print("Finished testing at (GMT) : ", end_time)
    print("Total training time  : ", end_time - start_time)
    print("============================================================================================")




if __name__ == '__main__':

    test()

Device set to : cpu
training environment name : Ansys_assembly
load the model from : PPO_preTrained/Ansys_assembly/deformed_inputs_nofix_metal/deformed_inputs_nofix_metal_pretrained.pth
current logging run number for Ansys_assembly :  deformed_inputs_nofix_metal_pretrained
logging at : PPO_logs/Ansys_assembly/deformed_inputs_nofix_metal/PPO_test_deformed_inputs_nofix_metal_pretrained.txt
initialize environment


  return torch._C._cuda_getDeviceCount() > 0


--------------------------------------------------------------------------------------------
max training timesteps :  100
max timesteps per episode :  20
--------------------------------------------------------------------------------------------
state space dimension :  (3901, 3)
action2 space dimension :  3901
--------------------------------------------------------------------------------------------
Initializing a discrete action space policy
Let's use 0 GPUs!
Started training at (GMT) :  2024-08-31 20:47:16
current using input_step2_dp17.inp


  5%|██▏                                         | 1/20 [00:02<00:42,  2.21s/it]

1
0.0019200022075111737
0


 10%|████▍                                       | 2/20 [00:04<00:39,  2.18s/it]

2
0.0019200022075111737
0


 15%|██████▌                                     | 3/20 [00:06<00:39,  2.31s/it]

3
0.418490339918628
0


 20%|████████▊                                   | 4/20 [00:09<00:38,  2.42s/it]

4
0.23381141742343373
0


 25%|███████████                                 | 5/20 [00:11<00:37,  2.48s/it]

5
0.016571957829166005
0


 30%|█████████████▏                              | 6/20 [00:14<00:35,  2.51s/it]

6
0.0027952539351551392
0


 35%|███████████████▍                            | 7/20 [00:17<00:32,  2.52s/it]

7
0.0027669970863630155
0


 35%|███████████████▍                            | 7/20 [00:19<00:36,  2.81s/it]

8
0.0027484750465591955
0.2560908201645619
done
current using input_step2_dp9.inp



  5%|██▏                                         | 1/20 [00:02<00:40,  2.14s/it]

1
0.0016000037151328431
0


 10%|████▍                                       | 2/20 [00:04<00:38,  2.14s/it]

2
0.0016000037151328431
0


 15%|██████▌                                     | 3/20 [00:06<00:39,  2.32s/it]

3
0.4171016139941147
0


 20%|████████▊                                   | 4/20 [00:09<00:38,  2.42s/it]

4
0.19735114536134757
0


 25%|███████████                                 | 5/20 [00:11<00:36,  2.46s/it]

5
0.0090763923870693
0


 30%|█████████████▏                              | 6/20 [00:14<00:35,  2.51s/it]

6
0.010855376881617454
0


 35%|███████████████▍                            | 7/20 [00:17<00:33,  2.54s/it]

7
0.002612940508705734
0


 35%|███████████████▍                            | 7/20 [00:19<00:36,  2.82s/it]

8
0.002702340527266007
0.25682599255603716
done
current using input_step2_dp2.inp



  5%|██▏                                         | 1/20 [00:02<00:39,  2.07s/it]

1
0.0019200027638692945
0


 10%|████▍                                       | 2/20 [00:04<00:37,  2.10s/it]

2
0.0019200027638692945
0


 15%|██████▌                                     | 3/20 [00:06<00:38,  2.29s/it]

3
0.4263883516418265
0


 20%|████████▊                                   | 4/20 [00:09<00:38,  2.41s/it]

4
0.19466159400366995
0


 25%|███████████                                 | 5/20 [00:11<00:37,  2.47s/it]

5
0.008666347059811338
0


 30%|█████████████▏                              | 6/20 [00:14<00:34,  2.49s/it]

6
0.010482266221924478
0


 35%|███████████████▍                            | 7/20 [00:17<00:32,  2.54s/it]

7
0.002931203870698229
0


 35%|███████████████▍                            | 7/20 [00:19<00:36,  2.80s/it]

8
0.0025023445725900896
0.26016528882193773
done
current using input_step2_dp14.inp



  5%|██▏                                         | 1/20 [00:02<00:41,  2.21s/it]

1
0.0017600026126732456
0


 10%|████▍                                       | 2/20 [00:04<00:42,  2.37s/it]

2
0.0017600026126732456
0


 15%|██████▌                                     | 3/20 [00:07<00:41,  2.45s/it]

3
0.4227875658584083
0


 20%|████████▊                                   | 4/20 [00:09<00:40,  2.51s/it]

4
0.21315338032065786
0


 25%|███████████                                 | 5/20 [00:12<00:37,  2.53s/it]

5
0.01792655707509028
0


 30%|█████████████▏                              | 6/20 [00:14<00:35,  2.55s/it]

6
0.0034354073849087396
0


 35%|███████████████▍                            | 7/20 [00:17<00:33,  2.56s/it]

7
0.002458218467467919
0


 35%|███████████████▍                            | 7/20 [00:20<00:37,  2.88s/it]

8
0.002508277370598918
0.260062443998649
done
current using input_step2_dp13.inp



  5%|██▏                                         | 1/20 [00:02<00:40,  2.16s/it]

1
0.0019200027062920223
0


 10%|████▍                                       | 2/20 [00:04<00:40,  2.25s/it]

2
0.0019200027062920223
0


 15%|██████▌                                     | 3/20 [00:07<00:40,  2.38s/it]

3
0.43221945358758207
0


 20%|████████▊                                   | 4/20 [00:09<00:38,  2.44s/it]

4
0.23892884720733443
0


 25%|███████████                                 | 5/20 [00:12<00:37,  2.49s/it]

5
0.015484092389303147
0


 30%|█████████████▏                              | 6/20 [00:14<00:35,  2.52s/it]

6
0.0024915956358066605
0


 35%|███████████████▍                            | 7/20 [00:17<00:32,  2.53s/it]

7
0.0024169650392758943
0


 35%|███████████████▍                            | 7/20 [00:19<00:36,  2.83s/it]

8
0.0027491058759117723
0.25608085339146974
done
Started testing at (GMT) :  2024-08-31 20:47:16
Finished testing at (GMT) :  2024-08-31 20:49:05
Total training time  :  0:01:49



