# Solution

Returns a history of all data entered in a REDCap project for specified subjects via REDCap logs. Exported to an Excel file.

In [7]:
from tqdm import tqdm
import numpy as np
import pandas as pd
import AMBRA_Backups
from AMBRA_Backups.redcap_funcs import extract_details

db = AMBRA_Backups.database.Database("STEP")


def q(query, record=None):
    if any(substring in query for substring in ["UPDATE", "INSERT", "DELETE"]):
        return db.run_insert_query(query, record)
    elif "SELECT" in query:
        return pd.DataFrame(db.run_select_query(query, record, column_names=True))
    else:
        print("You dont have UPDATE, INSERT, DELETE or SELECT in your query my guy")

In [13]:
def rc_data_history(project_name, patients, excel_out=None):
    """
    project_name - str,
    patients - list of strs, pass empty list for all patients
    excel_out - str

    returns a history of all data entered in a redcap project
    for specified patients via redcap logs
    """

    project = AMBRA_Backups.redcap_funcs.get_redcap_project(project_name)
    if patients:
        log_df = project.export_logging(
            format_type="df", log_type="record", record=[patients]
        )
    else:
        log_df = project.export_logging(format_type="df", log_type="record")

    master_df = pd.DataFrame()
    pat_group = log_df.groupby("record")
    for _, pat in tqdm(pat_group):
        pat = pat.sort_values(by="timestamp")
        det = pat.apply(
            lambda x: extract_details(x["details"] + ",")
            if isinstance(x["details"], str)
            else None,
            axis=1,
        ).dropna()
        det = pd.DataFrame(det.tolist(), index=det.index)
        pat = pat[["timestamp", "username", "action", "record"]]
        pat_hist_df = pd.concat([pat, det], axis=1).reset_index(drop=True)
        master_df = pd.concat([master_df, pat_hist_df])

    # move instance column
    ins_idx = master_df.columns.get_loc("[instance]")
    master_df = master_df[
        master_df.columns[:4].to_list()
        + ["[instance]"]
        + master_df.columns[4:ins_idx].to_list()
        + master_df.columns[ins_idx + 1 :].to_list()
    ]

    # making ['timestamp', 'username', 'action', 'record', '[instance]'] a multi-index'd index
    # adding all the form_names, redcap_variables, and field_labels as multi-index'd columns
    md_idx = pd.MultiIndex.from_frame(
        master_df[["timestamp", "username", "action", "record", "[instance]"]]
    )
    master_df.index = md_idx
    master_df = master_df[master_df.columns[5:]]
    project = AMBRA_Backups.redcap_funcs.get_redcap_project("TESTED DC")
    md = pd.DataFrame(project.metadata)
    master_cols = pd.MultiIndex.from_frame(
        md[["form_name", "field_name", "field_label"]]
    )

    df_reindexed = pd.DataFrame(np.nan, index=master_df.index, columns=master_cols)
    for col in master_df.columns:
        matching_cols = [multi_col for multi_col in master_cols if multi_col[1] == col]
        for multi_col in matching_cols:
            df_reindexed[multi_col] = master_df[col]

    if excel_out:
        df_reindexed.to_excel(excel_out)
    return master_df


pats = ["12005", "13006", "14008", "21001"]
rc_data_history("TESTED DC", pats, "TESTED_data_history_1.xlsx")

100%|██████████| 1/1 [00:00<00:00, 245.58it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,q1001,q1002(1),q1002(2),q1002(3),q1004,q1005,q1006,q1007,q7001,q1009,...,mr_init,q2129,ct_comments,q1012_rmca(1),q1015(1),q1015(2),q1017_other,q1032(4),acu_hem_loc(3),f_u_cr_sign
timestamp,username,action,record,[instance],Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
2024-09-04 13:28,zjorj1lr,Create record 21001,21001,,'1',checked,checked,checked,'2024-08-14','02:03','0','1','0','0',...,,,,,,,,,,
2024-09-04 13:31,zjorj1lr,Update record 21001,21001,,,,,,,,,,,,...,,,,,,,,,,
2024-09-04 13:32,zjorj1lr,Update record 21001,21001,,,,,,,,,,,,...,,,,,,,,,,
2024-09-04 13:32,zjorj1lr,Update record 21001,21001,2.0,,,,,,,,,,,...,,,,,,,,,,
2024-09-04 13:33,zjorj1lr,Update record 21001,21001,3.0,,,,,,,,,,,...,,,,,,,,,,
2024-09-04 13:34,zjorj1lr,Update record 21001,21001,4.0,,,,,,,,,,,...,,,,,,,,,,
2024-11-06 11:24,zjorj1lr,Update record 21001,21001,,,,,,,,,,,,...,,,,,,,,,,
2024-11-06 11:24,zjorj1lr,Update record 21001,21001,2.0,,,,,,,,,,,...,,,,,,,,,,
2024-11-06 11:25,zjorj1lr,Update record 21001,21001,3.0,,,,,,,,,,,...,,,,,,,,,,
2024-11-06 11:25,zjorj1lr,Update record 21001,21001,4.0,,,,,,,,,,,...,,,,,,,,,,
