In [1]:
# Bone loss related medical terms
bone_loss_list = [
    'MESH:D001851',  # Bone Diseases, Metabolic
    'MESH:D010024',  # Osteoporosis
    'MESH:D015663',  # Osteoporosis, Postmenopausal
    'MESH:C537700',  # Juvenile osteoporosis
]

In [2]:
# All chemicals as candidate drugs
import pandas as pd

# general drug
DDinter_df = pd.read_csv("D:/projects/MGKG/data/DDinter/DDinter_triplets.csv", sep='\t', header=None)
DrugBank_df = pd.read_csv("D:/projects/MGKG/data/DrugBank/DrugBank_triplets.csv", sep='\t', header=None)
GNBR_df1 = pd.read_csv("D:/projects/MGKG/data/GNBR/chemical_disease_triplets2.csv", sep='\t', header=None)
GNBR_df2 = pd.read_csv("D:/projects/MGKG/data/GNBR/chemical_gene_triplets2.csv", sep='\t', header=None)
STITCH_df = pd.read_csv("D:/projects/MGKG/data/STITCH/STITCH_triplets2.csv", sep='\t', header=None)
FreeText_df1 = pd.read_csv("D:/projects/MGKG/data/FreeText/chemical_disease_triplets.csv", sep='\t', header=None)
FreeText_df2 = pd.read_csv("D:/projects/MGKG/data/FreeText/chemical_gene_triplets.csv", sep='\t', header=None)
general_drug_df = pd.concat([DDinter_df[0], DDinter_df[2], DrugBank_df[0], GNBR_df1[0], GNBR_df2[0], STITCH_df[0], FreeText_df1[0], FreeText_df2[0]])
general_drug_df.drop_duplicates(inplace=True)  # 36133
general_drug_df.rename('entity', inplace=True)
general_drug_list = general_drug_df.values.tolist()  # 36133

# traditional chinese medicine
TCMID_df = pd.read_csv("D:/projects/MGKG/data/TCMID/TCMID_triplets.csv", sep='\t', header=None)
tcm_drug_df = TCMID_df[0]
tcm_drug_df.drop_duplicates(inplace=True)  # 3004
tcm_drug_df.rename('entity', inplace=True)
tcm_drug_list = tcm_drug_df.values.tolist()  # 3004

In [3]:
# All relationship related to treatment
treatment = ['chemical:disease:T',
             'chemical:disease:Pr',
             'chemical:disease:Pa']

In [4]:
# Load entity & relation in pretrained model
entitiy_df = pd.read_table("D:/projects/MGKG/code/Train/TransE_l2/entities.tsv", sep='\t', header=None, names=['id', 'entity'])
relation_df = pd.read_table("D:/projects/MGKG/code/Train/TransE_l2/relations.tsv", sep='\t', header=None, names=['id', 'rel'])

In [5]:
# ID mapping
general_drug_df = pd.merge(general_drug_df, entitiy_df, on='entity')
tcm_drug_df = pd.merge(tcm_drug_df, entitiy_df, on='entity')

treatment_df = pd.merge(pd.DataFrame(treatment, columns=['rel']), relation_df)

disease_df = pd.merge(pd.DataFrame(bone_loss_list, columns=['entity']), entitiy_df)

In [None]:
# Load embeddings
import torch
import numpy as np

entity_emb = np.load("D:/projects/MGKG/code/ckpts/TransE_l2_MGKG_0/MGKG_TransE_l2_entity.npy")
rel_emb = np.load("D:/projects/MGKG/code/ckpts/TransE_l2_MGKG_0/MGKG_TransE_l2_relation.npy")

general_drug_ids = torch.tensor(general_drug_df['id'].values.tolist())
tcm_drug_ids = torch.tensor(tcm_drug_df['id'].values.tolist())
treatment_ids = torch.tensor(treatment_df['id'].values.tolist())
disease_ids = torch.tensor(disease_df['id'].values.tolist())

general_drug_embs = torch.tensor(entity_emb[general_drug_ids])
tcm_drug_embs = torch.tensor(entity_emb[tcm_drug_ids])
treatment_embs = [torch.tensor(rel_emb[rid]) for rid in treatment_ids]

## Drug Repurposing Based on Edge Score
We use following algorithm to calculate the edge score. Note, here we use logsigmiod to make all scores < 0. The larger the score is, the stronger the $h$ will have $r$ with $t$.

$\mathbf{d} = \gamma - ||\mathbf{h}+\mathbf{r}-\mathbf{t}||_{2}$

$\mathbf{score} = \log\left(\frac{1}{1+\exp(\mathbf{-d})}\right)$

When doing drug repurposing, we only use the treatment related relations.

In [None]:
import torch.nn.functional as F

gamma = 12.0  # 模型训练时使用的γ值


def transE_l2(head, rel, tail):
    score = head + rel - tail
    return gamma - torch.norm(score, p=2, dim=-1)


# 下面可以写成一个循环，但为了便于查看，写成了两个循环
# general_drug 大部分都是西药，可能包含一些中药成分
general_drugs = []
scores_per_disease = []
for rid in range(len(treatment_embs)):
    treatment_emb = treatment_embs[rid]
    for disease_id in disease_ids:
        disease_emb = entity_emb[disease_id]
        score = F.logsigmoid(transE_l2(general_drug_embs, treatment_emb, disease_emb))
        scores_per_disease.append(score)
        general_drugs.append(general_drug_ids)
general_drugs_scores = torch.cat(scores_per_disease)
general_drugs = torch.cat(general_drugs)

# 中药
tcm_drugs = []
scores_per_disease = []
for rid in range(len(treatment_embs)):
    treatment_emb = treatment_embs[rid]
    for disease_id in disease_ids:
        disease_emb = entity_emb[disease_id]
        score = F.logsigmoid(transE_l2(tcm_drug_embs, treatment_emb, disease_emb))
        scores_per_disease.append(score)
        tcm_drugs.append(tcm_drug_ids)
tcm_drugs_scores = torch.cat(scores_per_disease)
tcm_drugs = torch.cat(tcm_drugs)

In [None]:
# sort scores in decending order
general_drugs_idx = torch.argsort(general_drugs_scores, descending=True)
general_drugs_scores = general_drugs_scores[general_drugs_idx].numpy()
general_drugs = general_drugs[general_drugs_idx].numpy()

tcm_drugs_idx = torch.argsort(tcm_drugs_scores, descending=True)
tcm_drugs_scores = tcm_drugs_scores[tcm_drugs_idx].numpy()
tcm_drugs = tcm_drugs[tcm_drugs_idx].numpy()

In [None]:
# select top 100
topk=100

_, general_drugs_unique_idx = np.unique(general_drugs, return_index=True)
general_drugs_topk_idx = np.sort(general_drugs_unique_idx)[:topk]
proposed_general_drugs_ids = general_drugs[general_drugs_topk_idx]
proposed_general_drugs_scores = general_drugs_scores[general_drugs_topk_idx]

_, tcm_drugs_unique_idx = np.unique(tcm_drugs, return_index=True)
tcm_drugs_topk_idx = np.sort(tcm_drugs_unique_idx)[:topk]
proposed_tcm_drugs_ids = tcm_drugs[tcm_drugs_topk_idx]
proposed_tcm_drugs_scores = tcm_drugs_scores[tcm_drugs_topk_idx]

In [None]:
# save predict results
wmd_id = pd.DataFrame(proposed_general_drugs_ids, columns=['id'])
tcm_id = pd.DataFrame(proposed_tcm_drugs_ids, columns=['id'])
modern_medicine = pd.merge(wmd_id, general_drug_df)
traditional_Chinese_medicine = pd.merge(tcm_id, tcm_drug_df)
modern_medicine['score'] = proposed_general_drugs_scores
traditional_Chinese_medicine['score'] = proposed_tcm_drugs_scores
modern_medicine.drop('id', axis=1, inplace=True)
traditional_Chinese_medicine.drop('id', axis=1, inplace=True)

modern_medicine.to_csv('D:/projects/MGKG/code/Predict/modern_medicine.csv', sep='\t', header=0, index=0)
traditional_Chinese_medicine.to_csv('D:/projects/MGKG/code/Predict/traditional_Chinese_medicine.csv', sep='\t', header=0, index=0)