In [56]:
import requests
import os
import csv

class UKBiobankHelper:
    def __init__(self, cachedir=None):
        if cachedir is None:
            cachedir = os.path.join(os.getcwd(), '.cache')
            
        if not os.path.exists(cachedir):
            os.mkdir(cachedir, 0o744)
            
        self.cachedir = cachedir
        
        schemadir = os.path.join(cachedir, 'schemas')
        if not os.path.exists(schemadir):
            os.mkdir( schemadir, 0o744 )
            
        self.schemadir = schemadir
        
    def updateSchemaDoc(self):
        schemafilename = os.path.join(self.schemadir, '999-schema.txt')
        
        if not os.path.exists(schemafilename):
            self.getRemoteSchemaDoc(999)
        
        filenames = []
        with open(schemafilename, 'r') as f:
            reader = csv.DictReader(f, delimiter="\t")
            headers = reader.fieldnames
            for row in reader:
                basename = row['schema_id'] + '-' + row['name'] + '.txt'
                filename = os.path.join(self.schemadir, basename)
                if not os.path.exists(filename):
                    self.getRemoteSchemaDoc(row['schema_id'])

    def getRemoteSchemaDoc(self, num):
        id = str(num)
        response1 = requests.get("http://biobank.ctsu.ox.ac.uk/showcase/scdown.cgi?fmt=txt&id=" + id)
        if not response1.ok:
            raise str(response) + ' // ' + str(response.headers)

        basename = id + '-' + response1.headers.get('Content-Disposition').split('=')[1]
        filename = os.path.join(self.schemadir, basename)
        with open(filename, 'wb') as f:
            f.write(response1.content)
        

In [57]:
helper = UKBiobankHelper()
helper.updateSchemaDoc()

# TODO 
 - make a .cache directory to put files from biobank.ctsu.ox.ac.uk