**Comparing OHDSI intermediate _temp files to ML Silver for Premier
**  
#### 5_write_procedures.py
person_source_value==medrec_key
visit_source_value==pat_key
```mermaid
graph
   procedure_occurrence_temp
   subgraph stage
      stage_procedures_temp-->procedure_occurrence_temp
      person--person_source_value-->procedure_occurrence_temp
      visit_occurrence--visit_source_value-->procedure_occurrence_temp
      concept_procedure--procedure_source_value procedure_source_code_type-->procedure_occurrence_temp
      concept_modifier--code_modifier-->procedure_occurrence_temp
   end


```
* ODHSI Domains  
 - Procedure:   ML Fact_Person where domain_id=='Procedure' to cdh_premier_omop.procedure_occurrence_temp  
 - Condition:   ML Fact_Person where domain_id=='Condition' to cdh_premier_omop.condition_occurrence_temp  
 - Measurement: ML Fact_Person where domain_id=='Measurement' to cdh_premier_omop.measurement_temp  
 - Observation: ML Fact_Person where domain_id=='Observation' to cdh_premier_omop.observation_temp  


In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import *

In [0]:
CATALOG="edav_prd_cdh"
SCHEMA="cdh_premier"

In [0]:
#edav_prd_cdh.cdh_premier_omop_etl.stage_visit
vo_df=spark.sql(f"select id as occurrence_id,CAST(person_source_value as LONG) as person_id,CAST(visit_source_value as LONG) as observation_period_id from {CATALOG}.{SCHEMA}_omop_etl.stage_visit " )
display(vo_df)

In [0]:
""" map fact_person.domain_id to _OMOP.{table} """
DOMAIN_TABLE_DICT={"Procedure":"procedure_occurrence_temp",
                   "Condition":"condition_occurrence_temp",
                   "Observation":"observation_temp",
                   "Measurement":"measurement_temp",
                   }
#                    "Procedure":"stage_procedure_temp",
#                   "Condition":"stage_condition_temp",
#                   "Measurement":"stage_measurement_temp",
#                   "Observation":"stage_observation_temp",


"""
OMOP:Premier:FACT
visit_source_value:patkey:observation_period_id
--visit_detail_source_value:
person_source_value:medrec_key:person_id
FACT_KEYS_DICT={"observation_period_id":"visit_source_value",
                "person_id":"person_source_value",
}
"""

In [0]:
domain_list=list(DOMAIN_TABLE_DICT.keys())
print(domain_list)


```python
TABLE_DATE_DICT={"Procedure":"procedure_datetime",
                   "Condition":"condition_start_datetime",
                   "Measurement":"measurement_datetime",
                   "Observation":"observation_datetime",
#                    "Procedure":"stage_procedure_temp",
#                   "Condition":"stage_condition_temp",
#                   "Measurement":"stage_measurement_temp",
#                   "Observation":"stage_observation_temp",
```                   }

In [0]:
from pyspark.sql.types import *

#['observation_period_id', 'person_id', 'observation_datetime', 'type_concept_id', 'concept_code', 'source_concept_id']
factSchema = StructType([ 
	StructField('observation_period_id', 
				IntegerType(), False), 
	StructField('person_id', 
				IntegerType(), False), 
	StructField('observation_datetime', 
				DateType(), True), 
	StructField('type_concept_id', 
				IntegerType(), False), 
	StructField('concept_code', 
				StringType(), True), 
	StructField('source_concept_id', 
				IntegerType(), False), 
	StructField('source_tbl', 
				StringType(), True), 
]) 
silver_df=spark.createDataFrame([], factSchema)

In [0]:
suffix_list=["type_concept_id","source_value","source_concept_id"]
for domain in domain_list:
    print(domain)
    fact_df=spark.table(f"{CATALOG}.{SCHEMA}_ra.fact_person").withColumn("has_fact",F.lit(1))
    #display(fact_df)
#keeps
    lower_domain=domain.lower()
    #date_var
    if lower_domain=='condition':
        date_var='condition_start_datetime'
    elif lower_domain=='observation':
        date_var=f"{lower_domain}_datetime"
    else: 
        date_var=f"{lower_domain}_date"
        
    print(date_var)
    #occurrence_var
    if lower_domain in ('measurement','observation'):
        occurrence_var=f"{lower_domain}_id"
    else:
        occurrence_var=f"{lower_domain}_occurrence_id"
    print(occurrence_var)
    suffixed=[f"{lower_domain}_{suffix}" for suffix in suffix_list]
    keep_list=['observation_period_id','person_id',date_var]+suffixed
    print(keep_list)
#renames
    renamed_dict={f"{lower_domain}_{suffix}":suffix for suffix in suffix_list}
    renamed_dict[f"{lower_domain}_source_value"]="concept_code"
    renamed_dict[date_var]="observation_datetime"
    print(renamed_dict)

# table processing
    omop_tbl=DOMAIN_TABLE_DICT[domain]
    omop_df=spark.table(f"{CATALOG}.{SCHEMA}_omop.{omop_tbl}").distinct().withColumnRenamed(occurrence_var,"occurrence_id").join(vo_df,["occurrence_id","person_id"],'inner').select(*keep_list).withColumnsRenamed(renamed_dict).withColumn("source_tbl",F.lit(domain))
    #.withColumnRenamed(f"{lower_domain}_source_value","concept_code")
    #.withColumnRenamed('occurrence_id',f"{lower_domain}_occurrence_id")
    #.select(join_list).withColumn("visit_source_value", F.col("visit_source_value").cast(LongType())).withColumn("has_OMOP",F.lit(1))
    display(omop_tbl)
    #print(omop_df.columns)

    #jlist=join_list+[omop_concept]
    #print(jlist)
    #outer_df=fact_df.withColumnsRenamed(FACT_KEYS_DICT).join(omop_df,on=[join_list],how="left")
    #display(outer_df)
    silver_df=silver_df.union(omop_df)

In [0]:
silver_df.write.mode("overwrite").format("delta").saveAsTable(f"{CATALOG}.{SCHEMA}_ra.fact_ohdsi")