In [1]:
import pandas as pd
import json

enrollment_columns = ['record_id', 'apple_pid_1']
symptom_columns = ['record_id', 'symptom_duration', 'collection_barcode', 'rejection_reason']
symptoms_in_redcap = {
    'symptoms___fever': 'Fever',
    'symptoms___breathe': 'Shortness of breath/increased trouble breathing',
    'symptoms___chills': 'Chills or shivering',
    'symptoms___cough': 'New or worsening cough',
    'symptoms___tired': 'Fatigue',
    'symptoms___ache': 'Muscle or body aches',
    'symptoms___headache': 'Headache',
    'symptoms___taste': 'New loss of taste',
    'symptoms___smell': 'New loss of smell',
    'symptoms___throat': 'Sore throat or itchy/scratchy throat',
    'symptoms___nose': 'Runny or stuffy nose/congestion',
    'symptoms___nausea': 'Nausea or vomiting',
    'symptoms___diarrhea': 'Diarrhea (3 loose stools in 24 hours)',
    'symptoms___sneeze': 'Sneezing',
    'symptoms___none': 'None of the above',
}

symptom_columns.extend(symptoms_in_redcap.keys())

    
# Read in Apple REDCap data from CSV generated by REDCap report
# REDCap report at https://redcap.iths.org/redcap_v11.1.2/DataExport/index.php?pid=24499&report_id=109255
redcap = pd.read_csv('apple-redcap-data.csv', dtype='string')

# Separate enrollment and symptom rows into separate DFs to merge into one row
enrollment = redcap.loc[redcap['redcap_event_name'] == 'enrollment_arm_1'][enrollment_columns]
symptoms = redcap.loc[redcap['redcap_event_name'].isin(['baseline_test_arm_1', 'illness_episode_arm_1'])][symptom_columns]

combined_data = enrollment.merge(symptoms, on=['record_id'], how='inner')

records = {}
for index, row in combined_data.iterrows():
    symptoms = {}
    # Map symptom fields to symptoms
    for redcap_field, symptom  in symptoms_in_redcap.items():
        symptoms[symptom] = bool(int(row[redcap_field]))
        
    symptoms['SymptomStartDate'] = row['symptom_duration'] if not pd.isna(row['symptom_duration']) else 'N/A'
    
    if not records.get(row.record_id):
        records[row.record_id] = {
            'ParticipantExternalID': row.apple_pid_1,
            'SymptomSets' : [symptoms]
        }
    else:
        records[row.record_id]['SymptomSets'].append(symptoms)

# Write JSON that follows format outlined in https://trello.com/c/jaWjb30Z/876-apple-jsons-data-sharing
with open('apple-symptoms.json', 'w') as symptoms_handle:
    json.dump(list(records.values()), symptoms_handle)


# Read in Apple results from CSV generated by download from Metabase
# Metabase query at https://backoffice.seattleflu.org/metabase/question/733
results_df = pd.read_csv('apple-results.csv', dtype='string')
results_df = results_df.merge(combined_data[['record_id', 'collection_barcode', 'apple_pid_1']], on=['collection_barcode'], how='inner')

results = {}
for index, row in results_df.iterrows():
    record_results = []
    
    hcov19 = json.loads(row.hcov19_result)
    hcov19['CollectionDate'] = row.collection_date
    hcov19['ResultDate'] = row.hcov19_result_date
    record_results.append(hcov19)
    
    # Only include OA results if available
    if not pd.isna(row.oa_results):
        open_array = json.loads(row.oa_results)
        open_array['CollectionDate'] = row.collection_date
        open_array['ResultDate'] = row.oa_result_date
        record_results.append(open_array)
    
    if not results.get(row.record_id):
        results[row.record_id] = {
            'ParticipantExternalID': row.apple_pid_1,
            'DiagnosisSets': record_results
        }
    else:
        results[row.record_id]['DiagnosisSets'].extend(record_results)

# Write JSON that follows format outlined in https://trello.com/c/jaWjb30Z/876-apple-jsons-data-sharing
with open('apple-results.json', 'w') as results_handle:
    json.dump(list(results.values()), results_handle)