# Purpose:

2015-03-18 (Wednesday)

Query the database.

# Table of Searches:

[How-many-positives-did-we-find-after-Gisella-left-in-July?](#How-many-positives-did-we-find-after-Gisella-left-in-July?)

[Get-next-set-of-fly-IDs-for-Robert's-MicroSat-work](#Get-next-set-of-fly-IDs-for-Robert's-MicroSat-work.)

## Imports:

In [14]:
# imports
import datetime as dt

import csv
import itertools
import os
import re

import pandas as pd
import numpy as np
import datetime as dt

import scipy

import munch


In [15]:
# for plotting
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import ggplot as gp


import numpy as np
import pandas as pd


import ggplot as g


## File paths:

In [16]:
# define paths to files
spring_summer = "/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/2014_spring_summer_from_rob.xlsx"
fall =          "/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/2014_fall_for_pandas.xlsx"
december = "/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/DEC_2014_survey_for_pandas.xlsx"

h5_out_path = "/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/hdf5/2014_collection_records.h5"
json_out_path = "/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/hdf5/2014_collection_records.json"
pickle_out_path = "/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/hdf5/2014_collection_records.pkl"

In [17]:
village_id_map_path = "/home/gus/Dropbox/uganda_data/data_repos/field_data/locations/names/uganda_village_id_map.csv"

----
# Helper functions

In [18]:
def date_is_between(test_date, start, end):
    
    try:
        return start <= test_date <= end
    except TypeError as exc:
        if "can't compare datetime" in exc.message:
            return False
        else:
            raise

In [19]:
def get_village_id_map(village_id_map_path):
    """
    Generates and returns a `dict` mapping the long-form village names to the letter codes.
    Letter codes map back to themselves to ensure a one way mapping.  
    Enforces both be all UPPERcase to allow case insensitivity as long as
    the map is used like: `map[text.upper()]`.
    :return: `dict`
    """

    village_id_map = {}

    with open(village_id_map_path, 'rb') as csv_file:
        village_ids = csv.reader(csv_file, delimiter=',')
        for pair in village_ids:
            village_id_map[unicode(pair[0].upper())] = unicode(pair[0].upper())
            village_id_map[unicode(pair[1].upper())] = unicode(pair[0].upper())

    return village_id_map

In [20]:
def load_xl_sheets(xl_path):
    dfs = munch.Munch()
    
    xls = pd.ExcelFile(xl_path)
    
    workbook_name = os.path.basename(xl_path)
    
    for sheet in xls.sheet_names:
        if sheet.upper().startswith("DISSECT"):
            worksheet_df = xls.parse(sheetname=sheet, 
                                header=0, 
                                skiprows=None, skip_footer=0, 
                                index_col=None, parse_cols=None, 
                                parse_dates=False, date_parser=None, 
                                na_values=['NA'], 
                                thousands=None, chunksize=None, 
                                convert_float=False, 
                                has_index_names=False, converters=None)
            
            worksheet_df['workbook'] = workbook_name
            worksheet_df['worksheet'] = sheet
            
            dfs[sheet] = worksheet_df 
            
    return dfs

In [21]:
def recode_villages(df):
    map_func = lambda x: village_id_map[x.upper()]
        
    new_codes = df.Village.apply(map_func)
    df.Village = new_codes    

In [22]:
def recode_dead(df):
    def recode_func(x):
        # this is treated as an unknown case
        if pd.isnull(x):
            return x

        x = unicode(x)

        # True means DEAD
        # False means LIVE or NOT-DEAD
        # None means unknown

        try:
            # deal with Live type cases
            if x.upper().startswith('L'):
                return False


            if x.startswith('0'):
                return False


            # deal with Dead type cases
            if x.upper().startswith('D'):
                return True


            if x.startswith('1'):
                return True


            # deal with unknown type cases
            if x.upper().startswith('UN'):
                return None
        except AttributeError:
            return x

        msg = "The value {x} was not expected and this function must be corrected to continue.".format(x=x)
        raise ValueError(msg)

    new_dead = df.Dead.apply(recode_func)
    df.Dead = new_dead

##########################################

def recode_teneral(df):
    def recode_func(x):

        # this is treated as an unknown case
        if pd.isnull(x):
            return x

        x = unicode(x)

        # True means teneral
        # False means NOT-teneral
        # None means unknown

        try:
            # deal with NOT-teneral type cases
            if x.upper().startswith('N'):
                return False

            if x.startswith('0'):
                return False

            # deal with Teneral type cases
            if x.upper().startswith('T'):
                return True
            
            if x.startswith('1'):
                return True


            # Deal with unknown type cases
            if x.upper().startswith('UN'):
                return x
        except AttributeError:
            return x

        msg = "The value {x} was not expected and this function must be corrected to continue.".format(x=x)
        raise ValueError(msg)
    
    
    new_teneral = df.Teneral.apply(recode_func)
    df.Teneral = new_teneral

##########################################

def recode_positives(df):
    def recode_func(x):
        # this is treated as an unknown case
        if pd.isnull(x):
            return x

        y = unicode(x)

        # deal with Unknown type cases
        if y.upper().startswith('UN'):
            return None

        if y.upper().startswith('DEAD'):
            return None


        # deal with Positive type cases
        if y.startswith('1'):
            return True


        if y.upper().startswith('TRUE'):
            return True

        if y.upper().startswith('P'):
            return True

        if y.upper().startswith('Y'):
            return True


        # deal with Negative type cases
        if y.upper().startswith('NO'):
            return False

        if y.upper().startswith('FALSE'):
            return False


        if y.startswith('0'):
            return False


        msg = "The value {x} was not expected and this function must be corrected to continue.".format(x=x)
        raise ValueError(msg)


    new_prob = df.prob.apply(recode_func)
    df.prob = new_prob
    
    new_midgut = df.midgut.apply(recode_func)
    df.midgut = new_midgut
    
    new_sal_gland = df.sal_gland.apply(recode_func)
    df.sal_gland = new_sal_gland

##########################################

def recode_species(df):

    recode_func = lambda x: ''.join(x.split('.')).capitalize()

    new_Species = df.Species.apply(recode_func)
    df.Species = new_Species

##########################################

def recode_sex(df):

    recode_func = lambda x: x.upper()

    new_Sex = df.Sex.apply(recode_func)
    df.Sex = new_Sex
    
##########################################

date_delim = re.compile('[\./-]')

def cast_unicode_as_date(x):
    if not isinstance(x, unicode):
        return x
    
    parts = date_delim.split(x)
    
    if len(parts) != 3:
        return x
    
    if len(parts[0]) != 4:
        return x
    
    return dt.datetime(int(parts[0]), int(parts[1]), int(parts[2]))

def recode_date(df):
    new_date = df.Date.apply(cast_unicode_as_date)
    df.Date = new_date

##########################################

fly_no_delim = re.compile('[\W\s]', re.UNICODE)

def split_number(x):
#     ipdb.set_trace()
    
    # to prevent unicode creating a string with a '.' AFTER
    # the numbert we are intersted in!
    try:
        if isinstance(x,float):
            return int(x)
    except ValueError as exc:
        if 'NAN' in exc.message.upper():
            return x
    
    x = unicode(x)
    parts = fly_no_delim.split(x)
    
    try:
        number = int(parts[-1])
        return number
    except ValueError:
        return x


def recode_fly_number(df):
    
    new_fly_number = df.Fly_Number.apply(split_number)
    df.Fly_Number = new_fly_number

In [23]:
def aggregate_column_from_df_list(df_list, col_name):
    agg_data = []
    for df in df_list:
        agg_data.extend(list(df[col_name]))
        
    
    return agg_data

### Functions that add new columns

In [24]:
def add_infection_state_col(df):
    df['infection_state'] = df[['prob','midgut','sal_gland']].any(skipna=True,axis=1)      

## Recode Fly_Number

In [25]:
# pdb
# import ipdb

In [26]:
df = pd.read_pickle(pickle_out_path)
recode_fly_number(df)

In [27]:
df_old_flno = pd.read_pickle(pickle_out_path)

In [28]:
df_old_flno.head()

Unnamed: 0,Comment,Date,Dead,Fly_Number,Hunger_stage,Kept_in,Sex,Species,Teneral,Trap_No,Village,Wing_fray,infection_state,midgut,prob,sal_gland,workbook,worksheet
0,,2014-07-22,False,UWA-14 001,2,EtOH,F,Gff,False,1,UWA,2,False,False,False,False,2014_spring_summer_from_rob.xlsx,Dissection Data-Nwoya
1,,2014-07-22,False,UWA-14 002,3,EtOH,F,Gff,False,1,UWA,2,False,False,False,False,2014_spring_summer_from_rob.xlsx,Dissection Data-Nwoya
2,,2014-07-22,False,UWA-14 003,3,EtOH,M,Gff,False,1,UWA,2,False,False,False,False,2014_spring_summer_from_rob.xlsx,Dissection Data-Nwoya
3,,2014-07-22,False,UWA-14 004,3,EtOH,M,Gff,False,1,UWA,2,False,False,False,False,2014_spring_summer_from_rob.xlsx,Dissection Data-Nwoya
4,,2014-07-22,False,UWA-14 005,3,EtOH,M,Gff,False,1,UWA,2,False,False,False,False,2014_spring_summer_from_rob.xlsx,Dissection Data-Nwoya


----
# Helpful constants

In [29]:
village_id_map = get_village_id_map(village_id_map_path)

In [30]:
# pdb

In [31]:
# # Example pivot_table
# tableXXXX = pd.pivot_table(df.query("XXXX == XXXX"),index=['XXXX'],
#                       values=['XXXX'],
#                       columns=['XXXX'],
#                       fill_value=0,
#                       aggfunc=[len])
# tableXXXX.head()

---
# Query the data

### How many positives did we find after Gisella left in July?

In [32]:
date_mask = df.Date.apply(lambda x: date_is_between(x,
                                                          dt.datetime(2014, 7, 11),
                                                          dt.datetime(2014, 8, 11)))
post_gisella_july = df[date_mask]

In [33]:
post_gisella_july_positive = post_gisella_july.query('infection_state == True')

In [34]:
post_gisella_july_negative = post_gisella_july.query('infection_state == False')

In [35]:
len(post_gisella_july_positive)

30

In [36]:
len(post_gisella_july_negative)

2376

In [37]:
# post_gisella_july_positive.to_csv('/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/seraps_stuff/positives_from_gus_gisella_time.tsv', sep='\t')

In [38]:
# post_gisella_july_negative.to_csv('/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/seraps_stuff/negatives_from_gus_gisella_time.tsv', sep='\t')

### Get next set of fly IDs for Robert's MicroSat work.

- NGO 15 each M/F
- CHU 15 each M/F
- TUM 15 each M/F

In [88]:
def get_random_rows(df, sample_size, exclude_by=None, exclude_values=None):
    
    assert (exclude_by is None) or isinstance(exclude_by,str)
    assert (exclude_values is None) or isinstance(exclude_values,list)
    
    try:
        if (exclude_by is not None) and (exclude_values is not None):
            post_exclude = df[df[exclude_by].apply(lambda x: x not in exclude_values)]
        else:
            post_exclude = df
            
        indexes = np.random.choice(post_exclude.index, sample_size, replace=False)
        return df.loc[indexes]
    except ValueError as exc:
        if "Cannot take a larger sample than population when 'replace=False'" in exc.message:
            print "Returning all {vil}:{sex} bc sample_size is larger than population.".format(vil=post_exclude.Village.iloc[0],
                                                                                               sex=post_exclude.Sex.iloc[0])
            return post_exclude

In [43]:
# get_random_rows(df, 10)

In [44]:
ngo_m = get_random_rows(df.query('Sex == "M"').query('Village == "NGO"'), 15)
ngo_f = get_random_rows(df.query('Sex == "F"').query('Village == "NGO"'), 15)

chu_m = get_random_rows(df.query('Sex == "M"').query('Village == "CHU"'), 15)
chu_f = get_random_rows(df.query('Sex == "F"').query('Village == "CHU"'), 15)

tum_m = get_random_rows(df.query('Sex == "M"').query('Village == "TUM"'), 15)
tum_f = get_random_rows(df.query('Sex == "F"').query('Village == "TUM"'), 15)

In [45]:
def write_samples(dir_path,df,sep_sex=False):
    
    if sep_sex:
        file_name = "{date}_{num}_{sex}_random_{vil}.xls".format(
            date=dt.datetime.today().isoformat().split('T')[0],
            num=len(df),
            sex=df.Sex.iloc[0],
            vil=df.Village.iloc[0])
    else:
        file_name = "{date}_{num}_MF_random_{vil}.xls".format(
            date=dt.datetime.today().isoformat().split('T')[0],
            num=len(df),
            vil=df.Village.iloc[0])
    
    out_path = "{dir}/{fname}".format(dir=os.path.dirname(dir_path),
                                      fname=file_name)
    
    df.to_excel(out_path)
    print out_path
        

In [46]:
# dir_path = "/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/"


# write_samples(dir_path, ngo_m)
# write_samples(dir_path, ngo_f)

# write_samples(dir_path, chu_m)
# write_samples(dir_path, chu_f)

# write_samples(dir_path, tum_m)
# write_samples(dir_path, tum_f)

print """
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_M_random_sampled_tsetse_from_NGO.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_F_random_sampled_tsetse_from_NGO.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_M_random_sampled_tsetse_from_CHU.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_F_random_sampled_tsetse_from_CHU.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_M_random_sampled_tsetse_from_TUM.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_F_random_sampled_tsetse_from_TUM.xls
"""


/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_M_random_sampled_tsetse_from_NGO.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_F_random_sampled_tsetse_from_NGO.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_M_random_sampled_tsetse_from_CHU.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_F_random_sampled_tsetse_from_CHU.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_M_random_sampled_tsetse_from_TUM.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-21_15_F_random_sampled_tsetse_from_TUM.xls



In [47]:
# chu_m.head()

### 2015-03-23: Get next set of fly IDs for Robert's MicroSat work to Kirstin/Alexis 

- GAN 15 each M/F
- LEA 15 each M/F
- OSG 15 each M/F
- GOR 15 each M/F  (NOPE)
- OGU 15 each M/F
- APU 15 each M/F

In [48]:
# GAN_m = get_random_rows(df.query('Sex == "M"').query('Village == "GAN"'), 15)
# GAN_f = get_random_rows(df.query('Sex == "F"').query('Village == "GAN"'), 15)

# # LEA_m = get_random_rows(df.query('Sex == "M"').query('Village == "LEA"'), 15)
# # LEA_f = get_random_rows(df.query('Sex == "F"').query('Village == "LEA"'), 15)

# OSG_m = get_random_rows(df.query('Sex == "M"').query('Village == "OSG"'), 15)
# OSG_f = get_random_rows(df.query('Sex == "F"').query('Village == "OSG"'), 15)

# GOR_m = get_random_rows(df.query('Sex == "M"').query('Village == "GOR"'), 15)
# GOR_f = get_random_rows(df.query('Sex == "F"').query('Village == "GOR"'), 15)

# # OGU_m = get_random_rows(df.query('Sex == "M"').query('Village == "OGU"'), 15)
# # OGU_f = get_random_rows(df.query('Sex == "F"').query('Village == "OGU"'), 15)

# APU_m = get_random_rows(df.query('Sex == "M"').query('Village == "APU"'), 15)
# APU_f = get_random_rows(df.query('Sex == "F"').query('Village == "APU"'), 15)



# GAN_mf = pd.concat([GAN_m, GAN_f])
# # LEA_mf = pd.concat([LEA_m, LEA_f])
# OSG_mf = pd.concat([OSG_m, OSG_f])
# # GOR_mf = pd.concat([GOR_m, GOR_f])
# # OGU_mf = pd.concat([OGU_m, OGU_f])
# APU_mf = pd.concat([APU_m, APU_f])

In [49]:
dir_path = "/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/"


# write_samples(dir_path, GAN_mf)
# # write_samples(dir_path, LEA_mf)
# write_samples(dir_path, OSG_mf)
# write_samples(dir_path, GOR_mf)
# # write_samples(dir_path, OGU_mf)
# write_samples(dir_path, APU_mf)

print """
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-23_30_MF_random_GAN.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-23_30_MF_random_OSG.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-23_30_MF_random_APU.xls
"""


/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-23_30_MF_random_GAN.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-23_30_MF_random_OSG.xls
/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-23_30_MF_random_APU.xls



### 2015-03-24: Get next set of fly IDs for Robert's MicroSat work to Kirstin/Alexis 

- OKS 15 each M/F

In [None]:
len(df.query('Village == "OKS"').query('Dead == False').query('Teneral == False').query('infection_state == False').query('Sex == "F"'))

In [None]:
OKS_select = df.query('Village == "OKS"').query('Dead == False').query('Teneral == False').query('infection_state == False')

In [None]:
OKS_select.query("Fly_Number == 175")

In [None]:
# OKS_m = get_random_rows(OKS_select.query('Sex == "M"'),15)
# OKS_f = get_random_rows(OKS_select.query('Sex == "F"'),15)

# OKS_mf = pd.concat([OKS_m, OKS_f])

In [None]:
# write_samples(dir_path, OKS_mf)

### 2015-03-23: Get next set of fly IDs for Robert's MicroSat work to Kirstin/Alexis 

- GAN 15 each M/F
- LEA 15 each M/F
- OSG 15 each M/F
- GOR 15 each M/F  (NOPE)
- OGU 15 each M/F
- APU 15 each M/F

In [51]:
osg_missing = [122,
               138,
               91,
               170,
               110,
               107,
               169,
               168,
               146,
               118,
               93,
               149,
               151,
               152,
               114,
               139,
               134,
               113,
               140
              ]

gan_missing = [280,
               151,
               436,
               105,
               307,
               154,
               432,
               314,
               312,
               147,
               308,
               435,
               433,
               259,
               296,
               255,
               126,
               261,
               298
              ]

In [89]:
# GAN_m = get_random_rows(df.query('Dead == False').query('Sex == "M"').query('Village == "GAN"'), 13, 
#                         exclude_by='Fly_Number', exclude_values=gan_missing)
# GAN_f = get_random_rows(df.query('Dead == False').query('Sex == "F"').query('Village == "GAN"'), 13, 
#                         exclude_by='Fly_Number', exclude_values=gan_missing)

# LEA_m = get_random_rows(df.query('Dead == False').query('Sex == "M"').query('Village == "LEA"'), 13, 
#                         exclude_by=None, exclude_values=None)
# LEA_f = get_random_rows(df.query('Dead == False').query('Sex == "F"').query('Village == "LEA"'), 13, 
#                         exclude_by=None, exclude_values=None)

# OSG_m = get_random_rows(df.query('Dead == False').query('Sex == "M"').query('Village == "OSG"'), 13, 
#                         exclude_by='Fly_Number', exclude_values=osg_missing)
# OSG_f = get_random_rows(df.query('Dead == False').query('Sex == "F"').query('Village == "OSG"'), 13, 
#                         exclude_by='Fly_Number', exclude_values=osg_missing)

# OGU_m = get_random_rows(df.query('Dead == False').query('Sex == "M"').query('Village == "OGU"'), 13, 
#                         exclude_by=None, exclude_values=None)
# OGU_f = get_random_rows(df.query('Dead == False').query('Sex == "F"').query('Village == "OGU"'), 13, 
#                         exclude_by=None, exclude_values=None)

# APU_m = get_random_rows(df.query('Dead == False').query('Sex == "M"').query('Village == "APU"'), 13, 
#                         exclude_by=None, exclude_values=None)
# APU_f = get_random_rows(df.query('Dead == False').query('Sex == "F"').query('Village == "APU"'), 13,
#                         exclude_by=None, exclude_values=None)



# GAN_mf = pd.concat([GAN_m, GAN_f])
# LEA_mf = pd.concat([LEA_m, LEA_f])
# OSG_mf = pd.concat([OSG_m, OSG_f])
# OGU_mf = pd.concat([OGU_m, OGU_f])
# APU_mf = pd.concat([APU_m, APU_f])


# print "{vil}: {leng}".format(vil=GAN_mf.Village.iloc[0], leng=len(GAN_mf))
# print "{vil}: {leng}".format(vil=LEA_mf.Village.iloc[0], leng=len(LEA_mf))
# print "{vil}: {leng}".format(vil=OSG_mf.Village.iloc[0], leng=len(OSG_mf))
# print "{vil}: {leng}".format(vil=OGU_mf.Village.iloc[0], leng=len(OGU_mf))
# print "{vil}: {leng}".format(vil=APU_mf.Village.iloc[0], leng=len(APU_mf))

Returning all GAN:M bc sample_size is larger than population.
Returning all LEA:M bc sample_size is larger than population.
Returning all LEA:F bc sample_size is larger than population.
Returning all OSG:M bc sample_size is larger than population.
Returning all OGU:M bc sample_size is larger than population.
GAN: 25
LEA: 16
OSG: 25
OGU: 20
APU: 26


Output of above:

    Returning all GAN:M bc sample_size is larger than population.
    Returning all LEA:M bc sample_size is larger than population.
    Returning all LEA:F bc sample_size is larger than population.
    Returning all OSG:M bc sample_size is larger than population.
    Returning all OGU:M bc sample_size is larger than population.
    GAN: 25
    LEA: 16
    OSG: 25
    OGU: 20
    APU: 26

In [94]:
dir_path = "/home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/"


# write_samples(dir_path, GAN_mf)
# write_samples(dir_path, LEA_mf)
# write_samples(dir_path, OSG_mf)
# write_samples(dir_path, OGU_mf)
# write_samples(dir_path, APU_mf)


Output of above:

    /home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-31_25_MF_random_GAN.xls
    /home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-31_16_MF_random_LEA.xls
    /home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-31_25_MF_random_OSG.xls
    /home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-31_20_MF_random_OGU.xls
    /home/gus/Documents/YalePostDoc/project_stuff/g_f_fucipes_uganda/collection_data/samples_used/2015-03-31_26_MF_random_APU.xls