In [1]:
import os
# suppress silly log messages from tensorflow
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
import numpy as np
import random
import structure_prediction_utils as utils
from tensorflow import keras

In [2]:
data_folder = 'd:/UC_Master/COSC440/assignment_all/final_project/'
training_records = utils.load_preprocessed_data(data_folder, 'training.tfr')
validate_records = utils.load_preprocessed_data(data_folder, 'validation.tfr')
test_records = utils.load_preprocessed_data(data_folder, 'testing.tfr')

Debug info, shape of one data record:
evolutionary shape= (5376,)
id shape= (1,)
primary shape= (256,)
tertiary shape= (256, 3)
primary_onehot shape= (256, 21)
mask shape= (256,)
true_distances shape= (256, 256)
distance_mask shape= (256, 256)
Value info of one data record, structure: tf.Tensor([1806.5  322.7 6882.9], shape=(3,), dtype=float32)
Debug info, shape of one data record:
evolutionary shape= (5376,)
id shape= (1,)
primary shape= (256,)
tertiary shape= (256, 3)
primary_onehot shape= (256, 21)
mask shape= (256,)
true_distances shape= (256, 256)
distance_mask shape= (256, 256)
Value info of one data record, structure: tf.Tensor([0. 0. 0.], shape=(3,), dtype=float32)
Debug info, shape of one data record:
evolutionary shape= (5376,)
id shape= (1,)
primary shape= (256,)
tertiary shape= (256, 3)
primary_onehot shape= (256, 21)
mask shape= (256,)
true_distances shape= (256, 256)
distance_mask shape= (256, 256)
Value info of one data record, structure: tf.Tensor([0. 0. 0.], shape=(3,)

In [26]:
class ProteinStructurePredictor0(keras.Model):
    def __init__(self):
        super().__init__()
        self.layer0 = keras.layers.Conv2D(5, 5, activation='gelu', padding="same")
        self.layer1 = keras.layers.Conv2D(1, 1, activation='gelu', padding="same")

    #@tf.function
    def call(self, inputs, mask=None):
        print("start call", '-'*20)
        primary_one_hot = inputs['primary_onehot']
        print(f"inputs x shape is: {primary_one_hot.shape}")
        # outer sum to get a NUM_RESIDUES x NUM_RESIDUES x embedding size
        x = tf.expand_dims(primary_one_hot, -2) + tf.expand_dims(primary_one_hot, -3)
        print(f"expand_dims x shape is: {x.shape}")

        # filter the initial representation into an embedded representation
        x = self.layer0(x)
        print(f"filter layer0 x shape is: {x.shape}")


        # add positional distance information
        r = tf.range(0, utils.NUM_RESIDUES, dtype=tf.float32)
        distances = tf.abs(tf.expand_dims(r, -1) - tf.expand_dims(r, -2))
        distances_bc = tf.expand_dims(
            tf.broadcast_to(distances, [primary_one_hot.shape[0], utils.NUM_RESIDUES, utils.NUM_RESIDUES]), -1)
        print(f"distances_bc shape is: {distances_bc.shape}")
        # x = tf.concat([x, x * distances_bc, distances_bc], axis=-1)
        x = distances_bc
        # generate result
        x = self.layer1(x)
        print("End call", '-'*20)
        return x

In [27]:
model = ProteinStructurePredictor0()
model.optimizer = keras.optimizers.Adam(learning_rate=1e-2)
model.batch_size = 128
epochs = 5
def get_n_records(batch):
    return batch['primary_onehot'].shape[0]
def get_input_output_masks(batch):
    inputs = {'primary_onehot':batch['primary_onehot']}
    outputs = batch['true_distances']
    masks = batch['distance_mask']
    return inputs, outputs, masks

In [28]:
epoch_training_records = training_records.shuffle(buffer_size=256).batch(model.batch_size, drop_remainder=False)
for batch in epoch_training_records:
    inputs, labels, masks = get_input_output_masks(batch)
    print(inputs['primary_onehot'].shape,labels.shape,masks.shape)
    outputs = model(inputs, masks)

(128, 256, 21) (128, 256, 256) (128, 256, 256)
start call --------------------
inputs x shape is: (128, 256, 21)
expand_dims x shape is: (128, 256, 256, 21)
filter layer0 x shape is: (128, 256, 256, 5)
distances_bc shape is: (128, 256, 256, 1)
End call --------------------
(128, 256, 21) (128, 256, 256) (128, 256, 256)
start call --------------------
inputs x shape is: (128, 256, 21)
expand_dims x shape is: (128, 256, 256, 21)
filter layer0 x shape is: (128, 256, 256, 5)
distances_bc shape is: (128, 256, 256, 1)
End call --------------------
(128, 256, 21) (128, 256, 256) (128, 256, 256)
start call --------------------
inputs x shape is: (128, 256, 21)
expand_dims x shape is: (128, 256, 256, 21)
filter layer0 x shape is: (128, 256, 256, 5)
distances_bc shape is: (128, 256, 256, 1)
End call --------------------
(128, 256, 21) (128, 256, 256) (128, 256, 256)
start call --------------------
inputs x shape is: (128, 256, 21)
expand_dims x shape is: (128, 256, 256, 21)
filter layer0 x shape

In [26]:
import tensorflow as tf

# 查看所有物理 GPU 设备
gpus = tf.config.list_physical_devices('GPU')
print("Available GPUs:", gpus)


Available GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
