In [1]:
import os, sys
import django
from dateutil import parser

sys.path.append('../..') # add path to project root dir
os.environ["DJANGO_SETTINGS_MODULE"] = "tshilo_dikotla.settings"
import pytz, datetime
TIME_ZONE = 'Africa/Gaborone'
tz = pytz.timezone(TIME_ZONE)
d = datetime.datetime(2019, 5, 13).replace(tzinfo=tz)

# for more sophisticated setups, if you need to change connection settings (e.g. when using django-environ):
#os.environ["DATABASE_URL"] = "postgres://myuser:mypassword@localhost:54324/mydb"

# Connect to Django ORM
django.setup()
import uuid
from django.contrib.sites.models import Site
from django.core.exceptions import ValidationError
import pandas as pd

In [None]:
from django.apps import apps as django_apps
from td_maternal.models import AntenatalEnrollment, MaternalRando
from edc_appointment.models import Appointment
from edc_registration.models import RegisteredSubject


def maternal_crf_data_dict(crf_obj):
    data = obj.__dict__
    data.update(
        subject_identifier=obj.maternal_visit.subject_identifier,
        visit_datetime=obj.maternal_visit.report_datetime,
        last_alive_date=obj.maternal_visit.last_alive_date,
        reason=obj.maternal_visit.reason,
        survival_status=obj.maternal_visit.survival_status,
        visit_code=obj.maternal_visit.visit_code,
        study_status=obj.maternal_visit.study_status,
        appt_status=obj.maternal_visit.appointment.appt_status,
        appt_datetime=obj.maternal_visit.appointment.appt_datetime,
    )
    try:
        ae = AntenatalEnrollment.objects.get(subject_identifier=obj.maternal_visit.subject_identifier)
    except AntenatalEnrollment.DoesNotExist:
        raise ValidationError('AntenatalEnrollment can not be missing')
    else:
        data.update(enrollment_hiv_status=ae.current_hiv_status)
    try:
        rs = RegisteredSubject.objects.get(subject_identifier=obj.maternal_visit.subject_identifier)
    except RegisteredSubject.DoesNotExist:
        raise ValidationError('RegisteredSubject can not be missing')
    else:
        data.update(
            screening_age_in_years=rs.screening_age_in_years,
            registration_status=rs.registration_status,
            dob=rs.dob,
            gender=rs.gender,
            subject_type=rs.subject_type,
            registration_datetime=rs.registration_datetime,
        )
        try:
            maternal_rando = MaternalRando.objects.get(
                maternal_visit__subject_identifier=obj.maternal_visit.subject_identifier)
        except MaternalRando.DoesNotExist:
            data.update(
                rx=None,
                registration_datetime=None,
                randomization_datetime=None
            )
        else:
            data.update(
                rx=maternal_rando.rx,
                randomization_datetime=maternal_rando.randomization_datetime
            )
    return data

In [3]:
# Export common data
not_necessary = ['visit_title', 'time_point', 'timeppoint_datetime', 'best_appt_datetime']
crf_list = [
    'maternalultrasoundinitial',
    'maternalobstericalhistory',
    'maternalmedicalhistory',
    'maternaldemographics',
    'maternallifetimearvhistory',
    'maternalarvpreg',
    'maternalclinicalmeasurementsone',
    'maternalrando',
    'maternalinterimidcc',
    'maternalclinicalmeasurementstwo',
    'rapidtestresult',
    'maternaldiagnoses',
    'maternalsubstanceusepriorpreg',
    'maternalpostpartumdep',
    'maternalpostpartumfu',
    'maternalarvpost',
    'maternalcontraception',
    'maternalsrh',
    'rapidtestresult',
    ]

inlines_dict = {
    'maternalarvpreg': ['maternalarv', 'maternal_arv_preg']
    'maternalarvpost': ['maternalarvpostmed', 'maternal_arv_post']
}
inline_list = ['maternalarvpreg', 'maternalarvpost']


for crf_name in crf_list:
    crf_cls = django_apps.get_model('td_maternal', crf_name)
    objs = crf_cls.objects.all()
    count = 0
    crf_data = []
    for obj in objs:
        if crf_name in inline_list:
            inline_mdl, field_name = inlines_dict.get(crf_name)
            inline_model_cls = django_apps.get_model('inline_mdl')
            inline_objs = inline_model_cls.objects.filter(**{field_name: getattr(obj, field_name)})
            if inline_objs:
                for in_obj in inline_objs:
                    in_data = in_obj.__dict__
                    del in_data['id']
                    data = maternal_crf_data_dict(obj)
                    data.update(in_data)
                    crf_data.append(data)
                    count += 1
            else:
                data = maternal_crf_data_dict(obj)
                crf_data.append(data)
                count += 1
        else:
            data = maternal_crf_data_dict(obj)
            crf_data.append(data)
            count += 1
    timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    fname = 'td_maternal_' + crf_name + '_' + timestamp + '.csv'
    export_path = '/Users/coulsonkgathi/source/td_migration_export/20190627/maternal/'
    final_path = export_path + fname
    df_crf = pd.DataFrame(crf_data)
    df_crf.to_csv(final_path, encoding='utf-8')
    print('****************', crf_name, '****************')
    print('Total items: ', count)
    print('Expected total items: ', objs.count())
    print('##############################################')
print('Export completed')

**************** maternalultrasoundinitial ****************
Total items:  500
Expected total items:  500
##############################################
**************** maternalobstericalhistory ****************
Total items:  484
Expected total items:  484
##############################################
**************** maternalmedicalhistory ****************
Total items:  493
Expected total items:  493
##############################################
**************** maternaldemographics ****************
Total items:  494
Expected total items:  494
##############################################
**************** maternallifetimearvhistory ****************
Total items:  322
Expected total items:  322
##############################################
**************** maternalarvpreg ****************
Total items:  917
Expected total items:  917
##############################################
**************** maternalclinicalmeasurementsone ****************
Total items:  494
Expected total items:

In [3]:
# Export common data

crf_list = [
    'infantbirthdata',
    'infantbirthexam',
    'infantbirthfeedingvaccine',
    'infantbirtharv',
    'infantcongenitalanomalies',
    'infantnvpdispensing',
    'infantfu',
    'infantfuphysical',
    'infantfudx',
    'infantfunewmed',
    'infantarvproph',
    'infantfeeding',
    'solidfoodassessment',
    'infantnvpadjustment',
    'infantfuimmunizations',
]

not_necessary = ['visit_title', 'time_point', 'reason', 'timeppoint_datetime']
infant_crf_required_fields = [
    'subject_identifier', 'maternal_identifier', 'dob', 'gender', 'subject_type', 'rx', 'enrollment_hiv_status',
    'visit_datetime', 'appt_status', 'study_status', 'visit_code', 'appt_datetime', 'best_appt_datetime',
    'screening_age_in_years', 'registration_status', 'registration_datetime', 'randomization_datetime',
    'survival_status', 'last_alive_date', 
]

from td_maternal.models import AntenatalEnrollment, MaternalRando
from edc_registration.models import RegisteredSubject


for crf_name in crf_list:
    crf_cls = django_apps.get_model('td_maternal', crf_name)
    objs = crf_cls.objects.all()
    count = 0
    crf_data = []
    for obj in objs:
        data = obj.__dict__
        data.update(
            subject_identifier=obj.infant_visit.subject_identifier,
            visit_datetime=obj.infant_visit.report_datetime,
            last_alive_date=obj.infant_visit.last_alive_date,
            reason=obj.infant_visit.reason,
            survival_status=obj.infant_visit.survival_status,
            visit_code=obj.infant_visit.visit_code,
            study_status=obj.infant_visit.study_status,
            appt_status=obj.infant_visit.appointment.appt_status,
            appt_datetime=obj.infant_visit.appointment.appt_datetime,
        )
        try:
            ae = AntenatalEnrollment.objects.get(subject_identifier=obj.infant_visit.subject_identifier)
        except AntenatalEnrollment.DoesNotExist:
            raise ValidationError('AntenatalEnrollment can not be missing')
        else:
            data.update(enrollment_hiv_status=ae.current_hiv_status)
        try:
            rs = RegisteredSubject.objects.get(subject_identifier=obj.infant_visit.subject_identifier)
        except RegisteredSubject.DoesNotExist:
            raise ValidationError('RegisteredSubject can not be missing')
        else:
            data.update(
                screening_age_in_years=rs.screening_age_in_years,
                registration_status=rs.registration_status,
                dob=rs.dob,
                gender=rs.gender,
                subject_type=rs.subject_type,
                registration_datetime=rs.registration_datetime,
            )
            try:
                maternal_rando = MaternalRando.objects.get(
                    infant_visit__subject_identifier=obj.infant_visit.subject_identifier)
            except MaternalRando.DoesNotExist:
                data.update(
                    rx=None,
                    registration_datetime=None,
                    randomization_datetime=None
                )
            else:
                data.update(
                    rx=maternal_rando.rx,
                    randomization_datetime=maternal_rando.randomization_datetime
                )
            count += 1
            crf_data.append(data)
    timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    fname = 'td_maternal_' + crf_name + '_' + timestamp + '.csv'
    export_path = '/Users/coulsonkgathi/source/td_migration_export/20190627/maternal/'
    final_path = export_path + fname
    df_crf = pd.DataFrame(crf_data)
    df_crf.to_csv(final_path, encoding='utf-8')
    print('****************', crf_name, '****************')
    print('Total items: ', count)
    print('Expected total items: ', objs.count())
    print('##############################################')
print('Export completed')

In [None]:
# Export common data
not_necessary = ['visit_title', 'time_point', 'reason', 'timeppoint_datetime']
non_crf_required_fields = [
    'subject_identifier', 'dob', 'gender', 'subject_type', 'rx', 'enrollment_hiv_status',
    'visit_datetime', 'appt_status', 'study_status', 'visit_code', 'appt_datetime', 'best_appt_datetime',
    'screening_age_in_years', 'registration_status', 'registration_datetime', 'randomization_datetime',
    'survival_status', 'last_alive_date', 
]