In [85]:
#从已有模型finetuning
# config=[
#     "--logdir", "./runs/model_test/main_model",
#     "--model_suffix", "free",
# ]
import os
os.environ['NOTEBOOK']="1"

from params import parser

from model.PPO import PPO
from train.base import *
import numpy.ma as ma

In [67]:

class PPOT(PPO):
    def __init__(self, config):
        super().__init__(config)


    def update(self, memory, freeze_feature_exact = False):
        '''
        :param memory: data used for PPO training
        :return: total_loss and critic_loss
        '''

        # 获取转置后的训练数据，用于策略更新
        t_data = memory.transpose_data()  # Tensor len 13  pre torch.Size([1000, 50, 10])
        # 计算广义优势估计（GAE）和目标价值  A_t, G_t
        t_advantage_seq, v_target_seq = memory.get_gae_advantages()

        full_batch_size = len(t_data[-1])  # 获取完整批次大小 # 1000
        num_batch = np.ceil(full_batch_size / self.minibatch_size)  # 计算小批次数 1.0

        loss_epochs = 0
        v_loss_epochs = 0
        if freeze_feature_exact:
            for name, param in self.policy.named_parameters():
                if name.startswith('feature_exact'):
                    param.requires_grad = False

        for _ in range(self.k_epochs):  # 4
            # 对每个迭代进行小批次的策略更新
            # Split into multiple batches of updates due to memory limitations
            
            for i in range(int(num_batch)):
                if i + 1 < num_batch:
                    start_idx = i * self.minibatch_size
                    end_idx = (i + 1) * self.minibatch_size
                else:
                    # the last batch  处理最后一个小批次
                    start_idx = i * self.minibatch_size
                    end_idx = full_batch_size

                # 通过策略网络获取动作分布和值函数估计
                pis, vals = self.policy(fea_j=t_data[0][start_idx:end_idx],
                                        op_mask=t_data[1][start_idx:end_idx],
                                        candidate=t_data[6][start_idx:end_idx],
                                        fea_m=t_data[2][start_idx:end_idx],
                                        mch_mask=t_data[3][start_idx:end_idx],
                                        comp_idx=t_data[5][start_idx:end_idx],
                                        dynamic_pair_mask=t_data[4][start_idx:end_idx],
                                        fea_pairs=t_data[7][start_idx:end_idx])

                action_batch = t_data[8][start_idx: end_idx]  # 获取动作序列
                logprobs, ent_loss = eval_actions(pis, action_batch)  # 计算动作的概率和熵损失
                ratios = torch.exp(logprobs - t_data[12][start_idx: end_idx].detach())  # 计算重要性采样比率

                advantages = t_advantage_seq[start_idx: end_idx]  # 获取优势估计
                surr1 = ratios * advantages  # 计算第一个损失项
                surr2 = torch.clamp(ratios, 1 - self.eps_clip, 1 + self.eps_clip) * advantages  # 计算第二个损失项

                v_loss = self.V_loss_2(vals.squeeze(1), v_target_seq[start_idx: end_idx])  # 计算价值损失
                p_loss = - torch.min(surr1, surr2)  # 计算策略损失   L^PPO-clip(pi_theta)
                ent_loss = - ent_loss.clone()  # 计算熵损失
                loss = self.vloss_coef * v_loss + self.ploss_coef * p_loss + self.entloss_coef * ent_loss  # 计算总损失
                # 梯度清零，进行反向传播和优化
                self.optimizer.zero_grad()  
                loss_epochs += loss.mean().detach()
                v_loss_epochs += v_loss.mean().detach()
                loss.mean().backward()
                # # 查看哪些参数受到loss的影响
                # for name, param in self.policy.named_parameters():
                #     if param.grad is not None and torch.sum(torch.abs(param.grad)) > 0:
                #         print(name, "受到了loss的影响")
                #     else:
                #         print(name, "没有受到loss的影响")
                self.optimizer.step()
        # soft update 进行软更新
        for policy_old_params, policy_params in zip(self.policy_old.parameters(), self.policy.parameters()):
            policy_old_params.data.copy_(self.tau * policy_old_params.data + (1 - self.tau) * policy_params.data)

        return loss_epochs.item() / self.k_epochs, v_loss_epochs.item() / self.k_epochs


In [68]:

def PPO_initialize():
    ppo = PPOT(configs)
    
    # writer = SummaryWriter(log_dir=configs.logdir, flush_secs=180)

    # writer.add_graph(dict(ppo.policy.named_parameters()))
    # writer.close()
    return ppo


In [1]:
from fjsp_env_same_op_nums import FJSPEnvForSameOpNums

class FJSPEnvForSameOpNums_test(FJSPEnvForSameOpNums):
    def __init__(self, n_j, n_m):
        super().__init__(n_j, n_m)
    
    # def step(self, actions):
    #     """
    #         perform the state transition & return the next state and reward
    #     :param actions: the action list with shape [E]
    #     :return: the next state, reward and the done flag
    #     """
    #     print(actions)
    #     super().step(actions)
    
    def step(self, actions):
        """
            perform the state transition & return the next state and reward
        :param actions: the action list with shape [E]
        :return: the next state, reward and the done flag
        """
        # actions = [1, 6]
        chosen_job = actions // self.number_of_machines   # [0, 2]
        chosen_mch = actions % self.number_of_machines    # [1, 0]
        #self.candidate=array([[ 0,  3,  6,  9, 12], 
        ##                     [ 0,  3,  6,  9, 12]])
        chosen_op = self.candidate[self.env_idxs, chosen_job]  #行 列   -> [0, 6]

        if (self.reverse_process_relation[self.env_idxs, chosen_op, chosen_mch]).any():
            print(
                f'FJSP_Env.py Error from choosing action: Op {chosen_op} can\'t be processed by Mch {chosen_mch}')
            sys.exit()

        self.step_count += 1

        # update candidate  如果工件被加工，那么candidate对应位置的值要+1
        candidate_add_flag = (chosen_op != self.job_last_op_id[self.env_idxs, chosen_job])# [True, True]
        self.candidate[self.env_idxs, chosen_job] += candidate_add_flag
        # 这步以后candidate： array([[ 1,  3,  6,  9, 12], [ 0,  3,  7,  9, 12]])
        self.mask[self.env_idxs, chosen_job] = (1 - candidate_add_flag)
        #array([[False, False, False, False, False],
        #       [False, False, False, False, False]])
        # the start processing time of chosen operations
        '''self.candidate_free_time: array([[0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.]])
        self.mch_free_time: array([[0., 0., 0.], [0., 0., 0.]])
        '''
        chosen_op_st = np.maximum(self.candidate_free_time[self.env_idxs, chosen_job],
                                  self.mch_free_time[self.env_idxs, chosen_mch]) # 选择操作的开始时间
        '''self.op_ct: array([[0.74747475, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.15151515, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ]])'''
        self.op_ct[self.env_idxs, chosen_op] = chosen_op_st + self.op_pt[
            self.env_idxs, chosen_op, chosen_mch]  # 更新操作的结束时间
        self.candidate_free_time[self.env_idxs, chosen_job] = self.op_ct[self.env_idxs, chosen_op]
        self.mch_free_time[self.env_idxs, chosen_mch] = self.op_ct[self.env_idxs, chosen_op]
        '''self.mch_free_time: array([[0.74747475, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.15151515, 0.        , 0.        ]])'''
        
        true_chosen_op_st = np.maximum(self.true_candidate_free_time[self.env_idxs, chosen_job],
                                       self.true_mch_free_time[self.env_idxs, chosen_mch])## [0, 0]
        self.true_op_ct[self.env_idxs, chosen_op] = true_chosen_op_st + self.true_op_pt[
            self.env_idxs, chosen_op, chosen_mch]
        '''
        array([[74.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0., 15.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.]])
        '''
        self.true_candidate_free_time[self.env_idxs, chosen_job] = self.true_op_ct[
            self.env_idxs, chosen_op]
        '''array([[74.,  0.,  0.,  0.,  0.],
       [ 0.,  0., 15.,  0.,  0.]])'''
        self.true_mch_free_time[self.env_idxs, chosen_mch] = self.true_op_ct[
            self.env_idxs, chosen_op]
        '''array([[ 0., 74.,  0.],
       [15.,  0.,  0.]])'''
        self.current_makespan = np.maximum(self.current_makespan, self.true_op_ct[
            self.env_idxs, chosen_op]) # array([74., 15.])

        # update the candidate message
        mask_temp = candidate_add_flag # [True, True]
        self.candidate_pt[mask_temp, chosen_job[mask_temp]] = self.unmasked_op_pt[mask_temp, chosen_op[mask_temp] + 1]
        '''
        array([[[0.47474747, 0.97979798, 0.        ],
        [0.97979798, 0.81818182, 0.6969697 ],
        [0.67676768, 0.85858586, 0.48484848],
        [0.86868687, 0.        , 0.28282828],
        [0.65656566, 0.        , 0.        ]],

       [[0.49494949, 0.        , 0.92929293],
        [0.90909091, 1.        , 0.        ],
        [0.13131313, 0.55555556, 0.        ],
        [0.75757576, 0.66666667, 0.48484848],
        [0.41414141, 0.        , 0.        ]]])
        '''
        self.candidate_process_relation[mask_temp, chosen_job[mask_temp]] = \
            self.reverse_process_relation[mask_temp, chosen_op[mask_temp] + 1]
        '''array([[[False, False,  True],
        [False, False, False],
        [False, False, False],
        [False,  True, False],
        [False,  True,  True]],

       [[False,  True, False],
        [False, False,  True],
        [False, False,  True],
        [False, False, False],
        [False,  True,  True]]])'''
        self.candidate_process_relation[~mask_temp, chosen_job[~mask_temp]] = 1
        '''array([[[False, False,  True],
        [False, False, False],
        [False, False, False],
        [False,  True, False],
        [False,  True,  True]],

       [[False,  True, False],
        [False, False,  True],
        [False, False,  True],
        [False, False, False],
        [False,  True,  True]]])'''
        # compute the next schedule time

        # [E, J, M]
        candidateFT_for_compare = np.expand_dims(self.candidate_free_time, axis=2)
        '''array([[[0.74747475],
        [0.        ],
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.        ],
        [0.        ],
        [0.15151515],
        [0.        ],
        [0.        ]]])'''
        mchFT_for_compare = np.expand_dims(self.mch_free_time, axis=1)
        '''array([[[0.        , 0.74747475, 0.        ]],
       [[0.15151515, 0.        , 0.        ]]])'''
        self.pair_free_time = np.maximum(candidateFT_for_compare, mchFT_for_compare)
        '''array([[[0.74747475, 0.74747475, 0.74747475],
        [0.        , 0.74747475, 0.        ],
        [0.        , 0.74747475, 0.        ],
        [0.        , 0.74747475, 0.        ],
        [0.        , 0.74747475, 0.        ]],

       [[0.15151515, 0.        , 0.        ],
        [0.15151515, 0.        , 0.        ],
        [0.15151515, 0.15151515, 0.15151515],
        [0.15151515, 0.        , 0.        ],
        [0.15151515, 0.        , 0.        ]]])'''
        schedule_matrix = ma.array(self.pair_free_time, mask=self.candidate_process_relation)
        '''masked_array(
  data=[[[0.747474747399245, 0.747474747399245, --],
         [0.0, 0.747474747399245, 0.0],
         [0.0, 0.747474747399245, 0.0],
         [0.0, --, 0.0],
         [0.0, --, --]],

        [[0.15151515149984696, --, 0.0],
         [0.15151515149984696, 0.0, --],
         [0.15151515149984696, 0.15151515149984696, --],
         [0.15151515149984696, 0.0, 0.0],
         [0.15151515149984696, --, --]]],'''
        self.next_schedule_time = np.min(
            schedule_matrix.reshape(self.number_of_envs, -1), axis=1).data

        self.remain_process_relation[self.env_idxs, chosen_op] = 0
        self.op_scheduled_flag[self.env_idxs, chosen_op] = 1

        """
            update the mask for deleting nodes
        """
        self.deleted_op_nodes = \
            np.logical_and((self.op_ct <= self.next_schedule_time[:, np.newaxis]),
                           self.op_scheduled_flag)
        self.delete_mask_fea_j = np.tile(self.deleted_op_nodes[:, :, np.newaxis],
                                         (1, 1, self.op_fea_dim))

        """
            update the state
        """
        self.update_op_mask()

        # update operation raw features
        diff = self.op_ct[self.env_idxs, chosen_op] - self.op_ct_lb[self.env_idxs, chosen_op]

        mask1 = (self.op_idx >= chosen_op[:, np.newaxis]) & \
                (self.op_idx < (self.job_last_op_id[self.env_idxs, chosen_job] + 1)[:,
                               np.newaxis])
        self.op_ct_lb[mask1] += np.tile(diff[:, np.newaxis], (1, self.number_of_ops))[mask1]

        mask2 = (self.op_idx >= (self.job_first_op_id[self.env_idxs, chosen_job])[:,
                                np.newaxis]) & \
                (self.op_idx < (self.job_last_op_id[self.env_idxs, chosen_job] + 1)[:,
                               np.newaxis])
        self.op_match_job_left_op_nums[mask2] -= 1
        self.op_match_job_remain_work[mask2] -= \
            np.tile(self.op_mean_pt[self.env_idxs, chosen_op][:, np.newaxis], (1, self.number_of_ops))[mask2]

        self.op_waiting_time = np.zeros((self.number_of_envs, self.number_of_ops))
        self.op_waiting_time[self.env_job_idx, self.candidate] = \
            (1 - self.mask) * np.maximum(np.expand_dims(self.next_schedule_time, axis=1)
                                         - self.candidate_free_time, 0) + self.mask * self.op_waiting_time[
                self.env_job_idx, self.candidate]

        self.op_remain_work = np.maximum(self.op_ct -
                                         np.expand_dims(self.next_schedule_time, axis=1), 0)

        self.construct_op_features()

        # update dynamic pair mask
        self.dynamic_pair_mask = np.copy(self.candidate_process_relation)

        self.unavailable_pairs = self.pair_free_time > self.next_schedule_time[:, np.newaxis, np.newaxis]

        self.dynamic_pair_mask = np.logical_or(self.dynamic_pair_mask, self.unavailable_pairs)

        # update comp_idx
        self.comp_idx = self.logic_operator(x=~self.dynamic_pair_mask)

        self.update_mch_mask()

        # update machine raw features
        self.mch_current_available_jc_nums = np.sum(~self.dynamic_pair_mask, axis=1)
        self.mch_current_available_op_nums -= self.process_relation[
            self.env_idxs, chosen_op]

        mch_free_duration = np.expand_dims(self.next_schedule_time, axis=1) - self.mch_free_time
        mch_free_flag = mch_free_duration < 0
        self.mch_working_flag = mch_free_flag + 0
        self.mch_waiting_time = (1 - mch_free_flag) * mch_free_duration

        self.mch_remain_work = np.maximum(-mch_free_duration, 0)

        self.construct_mch_features()

        self.construct_pair_features()

        # compute the reward : R_t = C_{LB}(s_{t}) - C_{LB}(s_{t+1})
        reward = self.max_endTime - np.max(self.op_ct_lb, axis=1)
        self.max_endTime = np.max(self.op_ct_lb, axis=1)

        # update the state
        self.state.update(self.fea_j, self.op_mask, self.fea_m, self.mch_mask,
                          self.dynamic_pair_mask, self.comp_idx, self.candidate,
                          self.fea_pairs)

        return self.state, np.array(reward), self.done()


usage: ipykernel_launcher.py [-h] [--device DEVICE] [--device_id DEVICE_ID]
                             [--model_suffix MODEL_SUFFIX]
                             [--data_suffix DATA_SUFFIX]
                             [--cover_flag COVER_FLAG]
                             [--cover_data_flag COVER_DATA_FLAG]
                             [--cover_heu_flag COVER_HEU_FLAG]
                             [--cover_train_flag COVER_TRAIN_FLAG]
                             [--model_source MODEL_SOURCE]
                             [--data_source DATA_SOURCE]
                             [--op_per_job OP_PER_JOB]
                             [--op_per_mch_min OP_PER_MCH_MIN]
                             [--op_per_mch_max OP_PER_MCH_MAX]
                             [--data_size DATA_SIZE] [--data_type DATA_TYPE]
                             [--sort_flag SORT_FLAG]
                             [--max_solve_time MAX_SOLVE_TIME]
                             [--seed_datagen SEED_DATAGEN]
         

SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [72]:
from fjsp_env_same_op_nums import FJSPEnvForSameOpNums

In [108]:

class DANTrainer(Trainer):
    def __init__(self, config):

        super().__init__(config)
        self.env = FJSPEnvForSameOpNums_test(self.n_j, self.n_m)
        self.finetuning_model = f'../trained_network/SD2/10x5+mix.pth'
        self.ppo = PPO_initialize()
        self.ppo.policy.load_state_dict(torch.load(self.finetuning_model, map_location='cuda'))
        self.ppo.policy_old = deepcopy(self.ppo.policy)
        print(self.finetuning_model)


    def train(self):
        """
            train the model following the config
        """
        setup_seed(self.seed_train)
        self.log = []
        self.validation_log = []
        self.record = float('inf')
        print("-" * 25 + "Training Setting" + "-" * 25)
        print(f"source : {self.data_source}")
    
        print(f"model name :{self.finetuning_model}")
        print(f"vali data :{self.vali_data_path}")
        print("\n")

        self.train_st = time.time()

        for i_update in tqdm(range(self.max_updates), file=sys.stdout, desc="progress", colour='blue'):
            ep_st = time.time()

            # resampling the training data
            if i_update  == 0:
                dataset_job_length, dataset_op_pt = self.sample_training_instances()
                # print(dataset_op_pt[0])
                state = self.env.set_initial_data(dataset_job_length, dataset_op_pt)
                state.print_shape()
                print(f"EnvState(\n"
                f"  fea_j_tensor 形状: {state.fea_j_tensor[0]},\n"
                f"  op_mask_tensor 形状: {state.op_mask_tensor[0]},\n"
                f"  candidate_tensor 形状: {state.candidate_tensor[0]},\n"
                f"  fea_m_tensor 形状: {state.fea_m_tensor[0]},\n"
                f"  mch_mask_tensor 形状: {state.mch_mask_tensor[0]},\n"
                f"  comp_idx_tensor 形状: {state.comp_idx_tensor[0]},\n"
                f"  dynamic_pair_mask_tensor 形状: {state.dynamic_pair_mask_tensor[0]},\n"
                f"  fea_pairs_tensor 形状: {state.fea_pairs_tensor[0]}\n"
                f")")

            else:
                state = self.env.reset()
                

            ep_rewards = - deepcopy(self.env.init_quality)

            while True:

                # state store
                self.memory.push(state)
                with torch.no_grad():

                    pi_envs, vals_envs = self.ppo.policy_old(fea_j=state.fea_j_tensor,  # [sz_b, N, 8]
                                                            op_mask=state.op_mask_tensor,  # [sz_b, N, N]
                                                            candidate=state.candidate_tensor,  # [sz_b, J]
                                                            fea_m=state.fea_m_tensor,  # [sz_b, M, 6]
                                                            mch_mask=state.mch_mask_tensor,  # [sz_b, M, M]
                                                            comp_idx=state.comp_idx_tensor,  # [sz_b, M, M, J]
                                                            dynamic_pair_mask=state.dynamic_pair_mask_tensor,  # [sz_b, J, M]
                                                            fea_pairs=state.fea_pairs_tensor)  # [sz_b, J, M]

                # sample the action
                action_envs, action_logprob_envs = sample_action(pi_envs)

                # state transition
                state, reward, done = self.env.step(actions=action_envs.cpu().numpy())
                ep_rewards += reward
                reward = torch.from_numpy(reward).to(device)

                # collect the transition
                self.memory.done_seq.append(torch.from_numpy(done).to(device))
                self.memory.reward_seq.append(reward)
                self.memory.action_seq.append(action_envs)
                self.memory.log_probs.append(action_logprob_envs)
                self.memory.val_seq.append(vals_envs.squeeze(1))

                if done.all():
                    break

            loss, v_loss = self.ppo.update(self.memory)
            self.memory.clear_memory()

            mean_rewards_all_env = np.mean(ep_rewards)
            mean_makespan_all_env = np.mean(self.env.current_makespan)
            # print(self.env.current_makespan)
            if i_update < 2: vali_result = mean_makespan_all_env 

            # save the mean rewards of all instances in current training data
            self.log.append([i_update, mean_rewards_all_env])

            ep_et = time.time()
            # print the reward, makespan, loss and training time of the current episode
            tqdm.write(
                'Episode {}\t reward: {:.2f}\t makespan: {:.2f}\t Mean_loss: {:.8f},  training time: {:.2f}'.format(
                    i_update + 1, mean_rewards_all_env, mean_makespan_all_env, loss, ep_et - ep_st))
            scalars = {f"makespan_{i}":m  for i, m in zip(range(self.num_envs), self.env.current_makespan)}
            scalars.update({
                'Loss/train': loss
                ,'makespan_train':mean_makespan_all_env
                ,'makespan_validate':vali_result
            })
            
            self.iter_log(i_update, scalars)

        self.train_et = time.time()

        # log results
        self.save_training_log()




In [110]:
configs = parser.parse_args(args=[
    "--logdir", "./runs/model_test/main_model",
    "--model_suffix", "free",
    "--max_updates", "21",
    "--n_j", "5",
    "--n_m", "3",
    "--num_envs", "2",
    ])

trainer = DANTrainer(configs)

trainer.train()


vali_data = ./data/data_train_vali/SD2/5x3+mix
save model name:  5x3+mix+free
../trained_network/SD2/10x5+mix.pth
-------------------------Training Setting-------------------------
source : SD2
model name :../trained_network/SD2/10x5+mix.pth
vali data :./data/data_train_vali/SD2/5x3+mix


progress:   0%|[34m          [0m| 0/21 [00:00<?, ?it/s]torch.Size([2, 15, 10])
torch.Size([2, 15, 3])
torch.Size([2, 5])
torch.Size([2, 3, 8])
torch.Size([2, 3, 3])
torch.Size([2, 3, 3, 5])
torch.Size([2, 5, 3])
torch.Size([2, 5, 3, 8])
EnvState(
  fea_j_tensor 形状: tensor([[ 0.0000, -1.4330, -0.2150,  0.0595, -0.3171,  0.0000,  0.0000,  0.0000,
          0.6641,  0.8429],
        [ 0.0000, -0.0337,  0.4589,  0.4611,  0.7945,  0.0000,  0.0000,  0.0000,
          0.6641, -0.5620],
        [ 0.0000,  1.0678, -0.0225,  0.3272,  0.5030,  0.0000,  0.0000,  0.0000,
          0.6641,  0.8429],
        [ 0.0000, -0.3612,  1.5179, -0.5206,  1.3595,  0.0000,  0.0000,  0.0000,
          0.7629,  0.8429],
      

-------------------------Training Setting-------------------------
source : SD2
model name :../trained_network/SD2/10x5+mix.pth
vali data :./data/data_train_vali/SD2/10x5+mix


progress:   0%|[34m          [0m| 0/21 [00:00<?, ?it/s][[ 0  0 71 27 81]
 [ 7 15 76 55 72]
 [ 0 44  0 56 26]
 [ 0 85  0  0 13]
 [19 82  0 52 45]
 [ 0 57 92 50  0]
 [ 4  0 12 22 90]
 [ 0  0 12  0 95]
 [ 0  0 88 15  0]
 [ 0 13  0  0  0]
 [ 0 62  0  0 48]
 [ 0  0 87 47  0]
 [ 0  0 26  0  0]
 [ 1  0  0  0  0]
 [69  0  0  0 61]
 [66 77 68 45  0]
 [ 8 89 71 14 29]
 [64  0  0  0  0]
 [ 0  0 59  0  0]
 [79  7 66 95 71]
 [ 0  0 77 77  0]
 [ 0 14 45  2 42]
 [79  0 88 64  0]
 [ 4 18 89 88 70]
 [98  0  3 19  0]
 [46 58 36 19 92]
 [47  0  0  0  0]
 [ 0  0 27  0  0]
 [11  0  0 59  0]
 [ 0 26 28 58  4]
 [79 86 60 49 57]
 [88 88  5 70 57]
 [61 10 67 34 70]
 [ 0 80 29  0  0]
 [ 4 49  4  0 92]
 [56  0  0 50 88]
 [86  0  0  0 67]
 [46 11  9 97 26]
 [ 0  0  0 71  0]
 [40  0 93 39  6]
 [10 41  0  0 22]
 [ 0  0  0 45  0]
 [ 0 82 62