# Semantic Network metadata
This notebook investigates relationships represented in Semantic Network
- SRSTR: distance-1 relations
- SRSTRE2: fully inherited relations

In [1]:
import pickle
import pandas as pd

In [2]:
srstr = pd.read_csv('/Users/qinyilong/Desktop/ScAi/SRSTR', sep='|', header=None, dtype=object)
srstre2 = pd.read_csv('/Users/qinyilong/Desktop/ScAi/SRSTRE2', sep='|', header=None, dtype=object)

In [3]:
srstr = srstr.drop(4, axis=1)
srstre2 = srstre2.drop(3, axis=1)

In [4]:
srstr.columns = ["STY/RL1", "RL", "STY/RL2", "LS"]
srstre2.columns = ["STY/RL1", "RL", "STY/RL2"]

## SRSTR

In [5]:
srstr

Unnamed: 0,STY/RL1,RL,STY/RL2,LS
0,Acquired Abnormality,co-occurs_with,Injury or Poisoning,D
1,Acquired Abnormality,isa,Anatomical Abnormality,D
2,Acquired Abnormality,result_of,Behavior,D
3,Activity,isa,Event,D
4,Age Group,isa,Group,D
...,...,...,...,...
592,temporally_related_to,isa,associated_with,D
593,traverses,isa,spatially_related_to,D
594,treats,isa,affects,D
595,tributary_of,isa,physically_related_to,D


### Partition data
- Relations among semantic types
    - D: defined
    - B: blocked
    - DNI: defined not inherited
- Relations among relations

In [6]:
# Relations among semantic types
srstr_sty = srstr.loc[:542]
srstr_sty.columns = ["STY1", "RL", "STY2", "LS"]
srstr_sty

Unnamed: 0,STY1,RL,STY2,LS
0,Acquired Abnormality,co-occurs_with,Injury or Poisoning,D
1,Acquired Abnormality,isa,Anatomical Abnormality,D
2,Acquired Abnormality,result_of,Behavior,D
3,Activity,isa,Event,D
4,Age Group,isa,Group,D
...,...,...,...,...
538,Vertebrate,isa,Animal,D
539,Virus,causes,Pathologic Function,D
540,Virus,isa,Organism,D
541,Virus,location_of,Biologically Active Substance,D


In [7]:
# Defined relations among semantic types
srstr_sty_d = srstr_sty[srstr_sty.LS == "D"]
srstr_sty_d

Unnamed: 0,STY1,RL,STY2,LS
0,Acquired Abnormality,co-occurs_with,Injury or Poisoning,D
1,Acquired Abnormality,isa,Anatomical Abnormality,D
2,Acquired Abnormality,result_of,Behavior,D
3,Activity,isa,Event,D
4,Age Group,isa,Group,D
...,...,...,...,...
538,Vertebrate,isa,Animal,D
539,Virus,causes,Pathologic Function,D
540,Virus,isa,Organism,D
541,Virus,location_of,Biologically Active Substance,D


In [8]:
# Blocked relations among semantic types
srstr_sty_b = srstr_sty[srstr_sty.LS == "B"]
srstr_sty_b

Unnamed: 0,STY1,RL,STY2,LS
359,Mental Process,process_of,Bacterium,B
360,Mental Process,process_of,Fungus,B
361,Mental Process,process_of,Plant,B
362,Mental Process,process_of,Virus,B
366,Mental or Behavioral Dysfunction,process_of,Bacterium,B
367,Mental or Behavioral Dysfunction,process_of,Fungus,B
368,Mental or Behavioral Dysfunction,process_of,Plant,B
369,Mental or Behavioral Dysfunction,process_of,Virus,B


In [9]:
# Defined Not Inherited relations among semantic types
srstr_sty_dni = srstr_sty[srstr_sty.LS == "DNI"]
srstr_sty_dni

Unnamed: 0,STY1,RL,STY2,LS
65,Body Location or Region,conceptual_part_of,Body Location or Region,DNI
66,Body Location or Region,conceptual_part_of,Body System,DNI
67,Body Location or Region,conceptual_part_of,Fully Formed Anatomical Structure,DNI
84,"Body Part, Organ, or Organ Component",conceptual_part_of,Body System,DNI
104,Body Space or Junction,conceptual_part_of,Body System,DNI
105,Body Space or Junction,conceptual_part_of,Fully Formed Anatomical Structure,DNI
123,Body Substance,conceptual_part_of,Body System,DNI
131,Body System,conceptual_part_of,Body System,DNI
132,Body System,conceptual_part_of,Fully Formed Anatomical Structure,DNI
138,Cell Component,conceptual_part_of,Body System,DNI


In [10]:
# Relations among relations
srstr_rl = srstr.loc[543:]
srstr_rl.columns = ["RL1", "RL", "RL2", "LS"]
srstr_rl

Unnamed: 0,RL1,RL,RL2,LS
543,adjacent_to,isa,spatially_related_to,D
544,affects,isa,functionally_related_to,D
545,analyzes,isa,conceptually_related_to,D
546,assesses_effect_of,isa,analyzes,D
547,associated_with,isa,,D
548,branch_of,isa,physically_related_to,D
549,brings_about,isa,functionally_related_to,D
550,carries_out,isa,performs,D
551,causes,isa,brings_about,D
552,co-occurs_with,isa,temporally_related_to,D


### Metadata
- Semantic type frequencies
- Reflexive (xRx) relations and their frequencies
- Symmetric (xRy yRx) relations and their frequencies

In [11]:
# Semantic type frequencies
srstr_sty_freq_1 = srstr_sty_d["STY1"].value_counts()
srstr_sty_freq_2 = srstr_sty_d["STY2"].value_counts()
srstr_sty_freq = srstr_sty_freq_1.add(srstr_sty_freq_2, fill_value=0)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(srstr_sty_freq)

Acquired Abnormality                        3.0
Activity                                    7.0
Age Group                                   2.0
Amino Acid Sequence                         3.0
Amino Acid, Peptide, or Protein             4.0
Amphibian                                   1.0
Anatomical Abnormality                     43.0
Anatomical Structure                        8.0
Animal                                      3.0
Antibiotic                                  1.0
Archaeon                                    1.0
Bacterium                                   5.0
Behavior                                   22.0
Biologic Function                          21.0
Biologically Active Substance              19.0
Biomedical Occupation or Discipline         1.0
Biomedical or Dental Material               1.0
Bird                                        1.0
Body Location or Region                    19.0
Body Part, Organ, or Organ Component       43.0
Body Space or Junction                  

In [12]:
# Create a pivot table with STY1 as rows and STY2 as columns 
# and the frequencies of some (STY1 R STY2) as elements
srstr_sty_d_pivot = srstr_sty_d.pivot_table(index='STY1', columns='STY2', aggfunc='size', fill_value=0)

In [13]:
# The pivot table turns out to have too many zero elements
# Unpivot the table keeping only non-zero elements
srstr_sty_d_pivot['STY1'] = srstr_sty_d_pivot.index
srstr_sty_d_pivot_melted = pd.melt(srstr_sty_d_pivot, id_vars=['STY1'])
srstr_sty_d_pivot_melted.columns = ["STY1", "STY2", "total"]
existing_rl = srstr_sty_d_pivot_melted[srstr_sty_d_pivot_melted.total >= 1]

In [14]:
# Reflexive relations
srstr_sty_d[srstr_sty_d.STY1 == srstr_sty_d.STY2]

Unnamed: 0,STY1,RL,STY2,LS
8,"Amino Acid, Peptide, or Protein",consists_of,"Amino Acid, Peptide, or Protein",D
14,Anatomical Abnormality,co-occurs_with,Anatomical Abnormality,D
15,Anatomical Abnormality,complicates,Anatomical Abnormality,D
20,Anatomical Abnormality,result_of,Anatomical Abnormality,D
35,Behavior,affects,Behavior,D
...,...,...,...,...
522,Tissue,adjacent_to,Tissue,D
524,Tissue,consists_of,Tissue,D
526,Tissue,developmental_form_of,Tissue,D
532,Tissue,part_of,Tissue,D


In [15]:
# Reflexive relation frequencies
existing_rl[existing_rl.STY1 == existing_rl.STY2]

Unnamed: 0,STY1,STY2,total
258,"Amino Acid, Peptide, or Protein","Amino Acid, Peptide, or Protein",1
387,Anatomical Abnormality,Anatomical Abnormality,3
901,Behavior,Behavior,1
1288,Body Location or Region,Body Location or Region,4
1416,"Body Part, Organ, or Organ Component","Body Part, Organ, or Organ Component",9
1544,Body Space or Junction,Body Space or Junction,3
1672,Body Substance,Body Substance,2
1802,Cell,Cell,4
1930,Cell Component,Cell Component,3
2060,Chemical,Chemical,1


In [16]:
# Symmetric relations
srstr_sty_d_non_relfex = srstr_sty_d[srstr_sty_d.STY1 != srstr_sty_d.STY2]
srstr_sty_d_symmetric = pd.DataFrame(columns=["STY1", "RL", "STY2", "LS"])
for index, row in srstr_sty_d_non_relfex.iterrows():
    sty1_in_sty2 = srstr_sty_d_non_relfex[srstr_sty_d_non_relfex.STY2 == row.STY1]
    srstr_sty_d_symmetric = pd.concat([srstr_sty_d_symmetric, sty1_in_sty2[sty1_in_sty2.STY1 == row.STY2]])
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(srstr_sty_d_symmetric.drop_duplicates())

Unnamed: 0,STY1,RL,STY2,LS
416,Pathologic Function,co-occurs_with,Anatomical Abnormality,D
419,Pathologic Function,complicates,Anatomical Abnormality,D
448,Phenomenon or Process,result_of,Anatomical Abnormality,D
357,Mental Process,affects,Behavior,D
363,Mental Process,result_of,Behavior,D
263,Group,exhibits,Behavior,D
364,Mental or Behavioral Dysfunction,affects,Behavior,D
53,Biologically Active Substance,complicates,Biologic Function,D
541,Virus,location_of,Biologically Active Substance,D
49,Biologic Function,produces,Biologically Active Substance,D


In [17]:
# Symmetric relation frequencies
existing_non_relfex_rl = existing_rl[existing_rl.STY1 != existing_rl.STY2]
symmetric_rl = pd.DataFrame(columns=["STY1", "STY2", "total"])
for index, row in existing_non_relfex_rl.iterrows():
    sty1_in_sty2 = existing_non_relfex_rl[existing_non_relfex_rl.STY2 == row.STY1]
    symmetric_rl = pd.concat([symmetric_rl, sty1_in_sty2[sty1_in_sty2.STY1 == row.STY2]])
display(symmetric_rl)

Unnamed: 0,STY1,STY2,total
7753,Anatomical Abnormality,Pathologic Function,1
8134,Anatomical Abnormality,Phenomenon or Process,1
4457,Behavior,Group,1
5854,Behavior,Mental Process,3
5981,Behavior,Mental or Behavioral Dysfunction,1
1156,Biologic Function,Biologically Active Substance,1
1030,Biologically Active Substance,Biologic Function,1
3824,Biologically Active Substance,Fully Formed Anatomical Structure,1
9920,Biologically Active Substance,Virus,1
1415,Body Location or Region,"Body Part, Organ, or Organ Component",2


## SRSTRE2

In [18]:
srstre2

Unnamed: 0,STY/RL1,RL,STY/RL2
0,Acquired Abnormality,affects,Amphibian
1,Acquired Abnormality,affects,Animal
2,Acquired Abnormality,affects,Archaeon
3,Acquired Abnormality,affects,Bacterium
4,Acquired Abnormality,affects,Bird
...,...,...,...
6212,treats,isa,functionally_related_to
6213,tributary_of,isa,associated_with
6214,tributary_of,isa,physically_related_to
6215,uses,isa,associated_with


### Partition data
- Relations among semantic types
- Relations among relations

In [19]:
# Relations among semantic types
srstre2_sty = srstre2.loc[:6104]
srstre2_sty.columns = ["STY1", "RL", "STY2"]
srstre2_sty

Unnamed: 0,STY1,RL,STY2
0,Acquired Abnormality,affects,Amphibian
1,Acquired Abnormality,affects,Animal
2,Acquired Abnormality,affects,Archaeon
3,Acquired Abnormality,affects,Bacterium
4,Acquired Abnormality,affects,Bird
...,...,...,...
6100,Vitamin,isa,Entity
6101,Vitamin,isa,Physical Object
6102,Vitamin,isa,Substance
6103,Vitamin,issue_in,Biomedical Occupation or Discipline


In [20]:
# Relations among relations
srstre2_rl = srstre2.loc[6105:]
srstre2_rl.columns = ["RL1", "RL", "RL2"]
srstre2_rl

Unnamed: 0,RL1,RL,RL2
6105,adjacent_to,isa,associated_with
6106,adjacent_to,isa,spatially_related_to
6107,affects,isa,associated_with
6108,affects,isa,functionally_related_to
6109,analyzes,isa,associated_with
...,...,...,...
6212,treats,isa,functionally_related_to
6213,tributary_of,isa,associated_with
6214,tributary_of,isa,physically_related_to
6215,uses,isa,associated_with


### Metadata
- relation frequencies

In [21]:
# relation frequencies
srstre2_sty["RL"].value_counts()

affects                    916
result_of                  608
isa                        464
process_of                 428
interacts_with             331
location_of                292
causes                     288
complicates                254
issue_in                   254
associated_with            246
produces                   226
manifestation_of           200
part_of                    185
measures                   156
disrupts                   140
occurs_in                   93
performs                    90
co-occurs_with              87
precedes                    86
uses                        65
evaluation_of               63
measurement_of              58
treats                      56
assesses_effect_of          53
diagnoses                   48
degree_of                   46
exhibits                    42
property_of                 40
analyzes                    40
carries_out                 38
prevents                    32
method_of                   28
indicate

## Compare SRSTR and SRSTRE2
- Semantic types are the same in two files

In [22]:
# Semantic types in SRSTR
semantic_types_srstr = pd.concat([srstr_sty.STY1, srstr_sty.STY2]).drop_duplicates()
semantic_types_srstr

0                 Acquired Abnormality
3                             Activity
4                            Age Group
5                  Amino Acid Sequence
8      Amino Acid, Peptide, or Protein
                    ...               
521                             Tissue
538                         Vertebrate
539                              Virus
542                            Vitamin
212                                NaN
Length: 128, dtype: object

In [23]:
# Semantic types in SRSTRE2
semantic_types_srstre2 = pd.concat([srstre2_sty.STY1, srstre2_sty.STY2]).drop_duplicates()
semantic_types_srstre2

0                      Acquired Abnormality
131                                Activity
134                               Age Group
175                     Amino Acid Sequence
185         Amino Acid, Peptide, or Protein
                       ...                 
5842    Therapeutic or Preventive Procedure
5907                                 Tissue
5977                             Vertebrate
5996                                  Virus
6031                                Vitamin
Length: 127, dtype: object

In [24]:
# Diff
pd.concat([semantic_types_srstr, semantic_types_srstre2]).drop_duplicates(keep=False)

212    NaN
dtype: object

#### Explanation:
For SRSTR, There is a total of 128 semantic types because the topmost nodes like "Event", "Entity" are connect to "" so it has one more NaN than SRSTRE2