# Concept table to EMERSE
This notebook transforms a concept table in the MedCAT format (see https://github.com/CogStack/MedCAT/tree/master/examples) to the synonym format for EMERSE (see https://project-emerse.org/documentation/administrator_guide.html#truepreparing-a-synonyms-file)

In [None]:
import pandas as pd
from pathlib import Path

input_file = Path('04_ConceptDB') / 'umls-dutch_v1.11-beta_with_drugs.csv'
output_file =  Path('04_ConceptDB') / 'emerse-synonyms-umls-dutch_v1.11-beta_with_drugs.csv'

In [None]:
input_df = pd.read_csv(input_file, dtype='str')

In [None]:
output_df = input_df[['cui', 'name']].copy()
output_df['type'] = 1
print(len(output_df))

# Remove long strings
output_df = output_df[output_df['name'].str.len() <= 150].copy()
print(len(output_df))
output_df.head()

In [None]:
metadata = """emerse_synonyms_metadata	name	Dutch UMLS names
emerse_synonyms_metadata	description	Contains Dutch MeSH, MedDRA, ICD10 and ICPC names from UMLS, as well as Dutch SNOMED-CT and English drug names from RXNORM, DRUGBANK and ATC.
emerse_synonyms_metadata	url	https://github.com/umcu/dutch-medical-entities/
emerse_synonyms_metadata	last_updated	02/11/2022
"""
print(metadata)

In [None]:
with open(output_file, 'w') as f:
    f.write(metadata)
output_df.to_csv(output_file,  mode='a', index=False, header=False, sep='\t')