# Scenario : 

All metadata are stored in the same database to simplify experiments (MongoDB database, after a file format transformation)
- Title (contains : "Summary","Register","Note","Example","Test","Sample")
- spatialextents
- Unit of measure
- language (usage of vocabularies / semantics models)
- Contact name

In [206]:
import pandas as pd
import warnings
from pprint import pprint
from pymongo import MongoClient
import ast 
import re
warnings.filterwarnings('ignore')


struct_matches = pd.read_csv("struct_matchings_list.csv")



# Title

In [207]:
# Model used : AERIS -> key = "resourceTitle.en"
# Request construction : 

model = "AERIS"
key = "resourceTitle.en"
# print(struct_matches[struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)["model_A"][0]=="AERIS"  )    ])

model_all_lines = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[0] == model )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[0] == model)) ]                            ) 

matches = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[1] == key )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[1] == key)) ]                            ) 

matches["model_A"] = matches["model_A"].apply(lambda x : ast.literal_eval(x))
matches["model_B"] = matches["model_B"].apply(lambda x : ast.literal_eval(x))


list_of_matches = set(  list(matches["model_A"]) + list(matches["model_B"]) )

In [208]:



# Request on title that contains one of the following words ("Summary|Register|Note|Example|Test|Sample")

client = MongoClient("localhost:27017")
results = {}
for i in list_of_matches :
    model = i[0]
    key = i[1]
    results[model]= list(client[model].interop_metadata.find({key:{"$regex":"Summary|Register|Note|Example|Test|Sample"}},{key:1,"_id":0}))

print(results)

{'SDMX': [{'data': {'structures': [{'annotations': [{'title': 'Sample observation annotation title'}]}]}}, {'data': {'structures': [{'annotations': [{'title': 'Sample series annotation title'}]}]}}, {'data': {'structures': [{'annotations': [{'title': 'Sample series annotation title'}, {'title': 'Sample observation annotation title'}]}]}}], 'EngMeta': [{'dataset': {'title': {'@value': 'This Is An English Testtitle'}}}], 'FHIR': [{'meta': {'extension': [{'valueString': 'HL7East Example'}, {}]}}], 'SensorML': [{'SimpleProcess': {'name': {'@value': 'Sensor Model Test'}}}], 'C-CDA': [{'ClinicalDocument': {'title': {'@value': 'Consultation Note'}}}, {'ClinicalDocument': {'title': {'@value': 'Community Health and Hospitals: Health Summary'}}}], 'DDI': [], 'PREMIS': [], 'TEI': [], 'DublinCore': [], 'e-GMS': [], 'DataCite': [{'resource': {'titles': {'title': {'@value': ['Full DataCite XML Example', 'Demonstration of DataCite Properties.']}}}}], 'PDB': [], 'AERIS': [], 'DarwinCore': [{'datasetNa

# Spatial extents

In [209]:


model = "DarwinCore"
key = "decimalLatitude"
# print(struct_matches[struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)["model_A"][0]=="AERIS"  )    ])

model_all_lines = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[0] == model )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[0] == model)) ]                            ) 

matches = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[1] == key )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[1] == key)) ]                            ) 

matches["model_A"] = matches["model_A"].apply(lambda x : ast.literal_eval(x))
matches["model_B"] = matches["model_B"].apply(lambda x : ast.literal_eval(x))


list_of_matches_lati = set(  list(matches["model_A"]) + list(matches["model_B"]) )
# print(list_of_matches_lati)



# Request on position (for the example, it returns everything possible) 

client = MongoClient("localhost:27017")
results = {}
for i in list_of_matches_lati :
    model = i[0]
    key = i[1]
    results[model]= list(client[model].interop_metadata.find({key:{"$lte":50}},{key:1,"_id":0}))

pprint(results)

{'AERIS': [{'spatialExtents': [{'area': {'latitude': 43.6855}}]},
           {'spatialExtents': [{'area': {'latitude': 14.394}}]},
           {'spatialExtents': [{'area': {'latitude': 48.709}}]},
           {'spatialExtents': [{'area': {'latitude': 48.709}}]},
           {'spatialExtents': [{'area': {'latitude': 43.93}}]},
           {'spatialExtents': [{'area': {}},
                               {'area': {'latitude': 35.86}},
                               {'area': {'latitude': 37.96}},
                               {'area': {'latitude': 41.393}},
                               {'area': {'latitude': 51.834999}},
                               {'area': {'latitude': 38.5678}},
                               {'area': {'latitude': 37.164}},
                               {'area': {'latitude': 47.8019}},
                               {'area': {'latitude': 44.348}},
                               {'area': {'latitude': 62.7333}},
                               {'area': {'latitude': 51.35}

# Unit of measure

Research of temperature dataset

In [210]:

model = "EngMeta"
key = "dataset.measuredVariable.unit.@value"
# print(struct_matches[struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)["model_A"][0]=="AERIS"  )    ])

model_all_lines = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[0] == model )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[0] == model)) ]                            ) 

matches = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[1] == key )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[1] == key)) ]                            ) 

matches["model_A"] = matches["model_A"].apply(lambda x : ast.literal_eval(x))
matches["model_B"] = matches["model_B"].apply(lambda x : ast.literal_eval(x))


list_of_matches_lati = set(  list(matches["model_A"]) + list(matches["model_B"]) )
# print(list_of_matches_lati)



# Request on position (for the example, it returns everything possible) 

client = MongoClient("localhost:27017")
results = {}
for i in list_of_matches_lati :
    model = i[0]
    key = i[1]
    results[model]= list(client[model].interop_metadata.find({key:{"$regex":re.compile('Celsius', re.IGNORECASE)}},{key:1,"_id":0}))
pprint(results)

{'AERIS': [{'parameterSet': [{'uom': 'meters per second'},
                             {'uom': 'Degrees Celsius'},
                             {'uom': 'grams per kilogram'},
                             {'uom': 'millibars'},
                             {'uom': 'degrees'}]},
           {'parameterSet': [{'uom': 'meter per second'},
                             {'uom': 'percent'},
                             {'uom': 'meter per second'},
                             {'uom': 'Watt per square meter'},
                             {'uom': 'Watt per square meter'},
                             {'uom': 'Watt per square meter'},
                             {'uom': 'cubic meter per cubic meter'},
                             {'uom': 'meter per second'},
                             {'uom': ''},
                             {'uom': 'Degrees Celsius'},
                             {'uom': 'Degrees Celsius'},
                             {'uom': 'meter per second'},
                           

# Language

2 language : english and french

In [211]:
# Get semantic language values / language code from ISO and IETF standard

language_codes = pd.read_csv("language_sem_models.csv")



In [217]:


model = "ISO19115"
key = "MD_Metadata.language.LanguageCode.codeListValue"

operand = language_codes.iloc[language_codes[(language_codes=="en")].dropna(axis=0, how="all").index]



model_all_lines = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[0] == model )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[0] == model)) ]                            ) 

matches = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[1] == key )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[1] == key)) ]                            ) 

matches["model_A"] = matches["model_A"].apply(lambda x : ast.literal_eval(x))
matches["model_B"] = matches["model_B"].apply(lambda x : ast.literal_eval(x))


list_of_matches_lati = set(  list(matches["model_A"]) + list(matches["model_B"]) )

# Used english word for french language
val=language_codes.iloc[language_codes[(language_codes=="French")].dropna(axis=0, how="all").index]
list_of_code = []
for i in val :
    list_of_code += list(val[i].unique())

list_of_code = [x for x in list_of_code if not pd.isna(x)]
operand = "|".join(list_of_code)


client = MongoClient("localhost:27017")
results = {}
for i in list_of_matches_lati :
    model = i[0]
    key = i[1]
    results[model]= list(client[model].interop_metadata.find({key:{"$in":list_of_code}},{key:1,"_id":0}))

results


{'EngMeta': [],
 'DublinCore': [{'metadata': {'description': {'lang': 'fr'}}}],
 'ISO19115': [{'MD_Metadata': {'language': {'LanguageCode': {'codeListValue': 'fre'}}}}],
 'OLAC': [],
 'AERIS': [],
 'e-GMS': [],
 'C-CDA': [],
 'DDI': [],
 'DataCite': [],
 'SDMX': [],
 'DarwinCore': []}

In [213]:
list_of_code

model = "SDMX"
key = "meta.contentLanguages"

operand = language_codes.iloc[language_codes[(language_codes=="en")].dropna(axis=0, how="all").index]



model_all_lines = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[0] == model )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[0] == model)) ]                            ) 

matches = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[1] == key )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[1] == key)) ]                            ) 

matches["model_A"] = matches["model_A"].apply(lambda x : ast.literal_eval(x))
matches["model_B"] = matches["model_B"].apply(lambda x : ast.literal_eval(x))


list_of_matches_lati = set(  list(matches["model_A"]) + list(matches["model_B"]) )

# Used french english word "anglais" for the research 


val=language_codes.iloc[language_codes[(language_codes=="anglais")].dropna(axis=0, how="all").index]
list_of_code = []
for i in val :
    list_of_code += list(val[i].unique())

list_of_code = [x for x in list_of_code if not pd.isna(x)]
operand = "|".join(list_of_code)


client = MongoClient("localhost:27017")
results = {}
for i in list_of_matches_lati :
    model = i[0]
    key = i[1]
    results[model]= list(client[model].interop_metadata.find({key:{"$in":list_of_code}},{key:1,"_id":0}))

results



{'EngMeta': [{'dataset': {'description': {'lang': 'en'}}}],
 'DublinCore': [],
 'ISO19115': [],
 'OLAC': [{'olac': {'language': {'code': 'en'}}}],
 'AERIS': [{'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'},
  {'language': 'en'}],
 'e-GMS': [],
 'C-CDA': [{'ClinicalDocument': {'languageCode': {'code': 'en-US'}}},
  {'ClinicalDocument': {'languageCode': {'code': 'en-US'}}}],
 'DDI': [{'FragmentInstance': {'Fragment': {'DataCollection': {'DataCollectionModuleName': {'String': {'lang': 'en-US'}}}}}}],
 'DataCite': [{'resource': {'language': {'@value': 'en-US'}}}],
 'SDMX': [{'meta': {'contentLanguages': ['en']}},
  {'meta': {'con

# Contact name
Search for "François" 

In [216]:

model = "e-GMS"
key = "Creator"

model_all_lines = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[0] == model )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[0] == model)) ]                            ) 

matches = (        struct_matches[(struct_matches["model_A"].apply(lambda x : ast.literal_eval(x)[1] == key )) | (struct_matches["model_B"].apply(lambda x : ast.literal_eval(x)[1] == key)) ]                            ) 

matches["model_A"] = matches["model_A"].apply(lambda x : ast.literal_eval(x))
matches["model_B"] = matches["model_B"].apply(lambda x : ast.literal_eval(x))


list_of_matches_lati = set(  list(matches["model_A"]) + list(matches["model_B"]) )

client = MongoClient("localhost:27017")
results = {}
for i in list_of_matches_lati :
    model = i[0]
    key = i[1] 
    results[model]= list(client[model].interop_metadata.find({key:{"$regex":"Francois|François"}},{key:1,"_id":0}))

pprint(results)

{'AERIS': [{'contacts': [{'name': 'Nicolas PASCAL'},
                         {'name': 'AERIS/ICARE Helpdesk'},
                         {'name': 'Jean Sciare'},
                         {'name': 'Valerie Gros'},
                         {'name': 'Francois Truong'}]},
           {'contacts': [{'name': 'Nicolas PASCAL'},
                         {'name': 'AERIS/ICARE Helpdesk'},
                         {'name': 'Jean Sciare'},
                         {'name': 'Valerie Gros'},
                         {'name': 'Francois Truong'}]},
           {'contacts': [{'name': 'François Gheusi\xa0'},
                         {'name': 'NDACC contact'}]},
           {'contacts': [{'name': 'Nicolas PASCAL'},
                         {'name': 'AERIS/ICARE Helpdesk'},
                         {'name': 'Jean Sciare'},
                         {'name': 'Valerie Gros'},
                         {'name': 'Francois Truong'}]},
           {'contacts': [{'name': 'Nicolas PASCAL'},
                         {'n