In [1]:
# import necessary modules
import smtplib
import pandas as pd
import os
import glob
import sys
import sched
import time
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

In [70]:
# set home directory so can be used on all OS
home = os.path.expanduser('~')

# Find most recently exported files from registry and repository
# (please note that one could also easily modify script to specify files you wish to use, but as we're trying to minimize RDRP data that's stored on machines, requiring download immediately before and then automating deletion within this script facilitates that end goal)
# reg_file should be report XX
# repo_file should be report XX
# link_file should be downloaded from XX as according to wiki
reg_file = max(glob.iglob(home+'/Downloads/RDRPRegistry-Emails_DATA_*'), key=os.path.getctime)
repo_file = max(glob.iglob(home+'/Downloads/RDRPRepository-SurveyStatus_DATA_*'), key=os.path.getctime)
link_file = max(glob.iglob(home+'/Downloads/RDRPRepository_Participants_*'), key=os.path.getctime)

In [71]:
# Load data from each using record_id as index
reg_data = pd.read_csv(reg_file, index_col='record_id')
repo_data = pd.read_csv(repo_file, index_col='record_id')
link_data = pd.read_csv(link_file, skiprows=1, header=None, names=['dummy', 'na',
    'record_id', 'd', 'e', 'f', 'survey', 'link'], index_col='record_id', dtype=object) # headers in the file are poorly written. na, d, e, f are all not used

# Extract links for survey queues via innerjoin
reg_data = reg_data.join(link_data, how='right')

# Use the repo_data to select which individuals will receive emails
subs = pd.DataFrame(reg_data.loc[repo_data.index])

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  # This is added back by InteractiveShellApp.init_path()


In [72]:
link_data

Unnamed: 0_level_0,dummy,na,d,e,f,survey,link
record_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1901,DD1901@red.cap,,No,No,EJWA9EYYJ,https://redcap.iths.org/surveys/?s=mpja4iZVvU,https://redcap.iths.org/surveys/?sq=JiwX3Jj9Wg
1903,DD1903@red.cap,,No,No,9KF9AYDCH,https://redcap.iths.org/surveys/?s=rWwXDCB4a4,https://redcap.iths.org/surveys/?sq=RE8UC2V7R9
1904,DD1904@red.cap,,No,No,AYXW93TP8,https://redcap.iths.org/surveys/?s=PrquQkWzee,https://redcap.iths.org/surveys/?sq=v635EmVqHf
1909,DD1909@red.cap,,No,No,LWN4M49WC,https://redcap.iths.org/surveys/?s=rGxEuGwFYI,https://redcap.iths.org/surveys/?sq=LgmXzcwSxu
1926,DD1926@red.cap,,No,No,PK8MDRL3K,https://redcap.iths.org/surveys/?s=beIe6u5dhy,https://redcap.iths.org/surveys/?sq=4cgIE7r2EE
...,...,...,...,...,...,...,...
1908,ED1908@red.cap,,No,No,RNDM44EDR,https://redcap.iths.org/surveys/?s=29JnGLHIeN,https://redcap.iths.org/surveys/?sq=ybYN4txq8h
1911,ED1911@red.cap,,No,No,MDX3P4AHE,https://redcap.iths.org/surveys/?s=qFtpnie3V6,https://redcap.iths.org/surveys/?sq=HhhbuG6RBb
1916,ED1916@red.cap,,No,No,98RYXT4H3,https://redcap.iths.org/surveys/?s=xgn9kdgFUm,https://redcap.iths.org/surveys/?sq=pduRoBUQeb
1920,ED1920@red.cap,,No,No,JREMAJDM9,https://redcap.iths.org/surveys/?s=msrbEKEmdF,https://redcap.iths.org/surveys/?sq=PsRHn89QCm


In [56]:
pd.notnull(subs['email'])

record_id
1       True
2       True
3       True
4       True
5       True
        ... 
2321    True
2324    True
2326    True
2330    True
2334    True
Name: email, Length: 1195, dtype: bool