**POC for identifying groups of individuals who can live together at a new dwelling**


In [70]:
# imports
import os
import pandas as pd
import numpy as np

In [71]:
# load in CSV of Persons
# expect single CSV to have all necessary info about a person
people_filepath = 'data/PeopleSheet.csv'
peeps = pd.read_csv(people_filepath)

In [72]:
# Create a Dwelling to test People against
# Note: no geo math, just flag whether, e.g., it is no_offenders (True)
dwelling_filepath = 'data/DwellingSheet.csv'
dwelling = pd.read_csv(dwelling_filepath)

**Check individuals against Dwelling**

returns dataframe of eligible people in eligibles_loc

In [73]:
# Test Persons against Dwelling, return list of eligible and eliminated
if not dwelling.offender_ok[0] == 'y':
    eligibles_loc = peeps[peeps['is_offender'] == 'n']  ## fragile: uppercase breaks
    eliminated_loc = peeps[peeps['is_offender'] == 'y']

print(f'# eligible: {len(eligibles_loc)}')
print(f'# eliminated: {len(eliminated_loc)}')

if len(eligibles_loc) + len(eliminated_loc) == len(peeps):
    print(f'All {len(peeps)} peeps accounted for!')    
else:
    print('Looks like we might have lost one! Started with X peeps.')
# test that all records went one place or the other (using .len?)

# eligible: 6
# eliminated: 1
All 7 peeps accounted for!


**HARD GROUPING**

Hard-group eligible persons based on a small number of lifestyle flags

**Hard grouping within DF**

This approach utilizes the existing dataframe for the grouping analysis. It "adds" individuals to groups one at a time by assigning them a value for the 'group' column.

In [74]:
# add 'group' column to peeps, initialize to -1
# This is its own block because it will error out if you add the same col twice.
eligibles_loc.insert(1, 'hard_group', -1)

In [75]:
# criteria for hard grouping. Later can exterinalize this to another CSV.
#criteria = ['smoking', 'dog']
criteria = pd.read_csv('data/HardFlagsSheet.csv')
criteria

Unnamed: 0,dog,smoking


In [79]:
for index, peep in eligibles_loc.iterrows():
    max_group = eligibles_loc.hard_group.max()
    found_group = False
    if max_group == -1:  # first time through, start Group 0
        eligibles_loc.hard_group[eligibles_loc.UID == peep.UID] = 0
    else:
        # loop thru groups, look for conflicts
        for check_group in range(max_group + 1):
            group_conflict = False
            # loop through members of group
            for index, groupie in eligibles_loc[eligibles_loc.hard_group == check_group].iterrows():
                # loop through types of conflict
                for flag_criterion in criteria:
                    if groupie[flag_criterion + '_has'] == 'y' and peep[flag_criterion + '_conflict'] == 'y':
                        group_conflict = True
                        print('group_conflict1:', group_conflict, '; criterion: ', flag_criterion)
                        break   # don't bother checking more criteria
                if group_conflict: # don't bother checking other group members, go to next group
                    print('group_conflict2', group_conflict)
                    break   
            if not group_conflict:  # peep found their group, don't check more groups
                eligibles_loc.hard_group[eligibles_loc.UID == peep.UID] = check_group
                found_group = True
                break   
        # if all groups failed...
        print(peep.UID, 'found_group', found_group)
        if not found_group:
            print(f'new group please for {peep.UID}:', max_group + 1)
            eligibles_loc.hard_group[eligibles_loc.UID == peep.UID] = max_group + 1
       
        
'''
$$$  BUGS  $$$$
-- If dog conflict is present in first row, first person gets group 1 and then everyone else is 0 (unexpected, but not necessarily wrong.
-- Second criterion overwrites groups, so final hard-grouping is only based on last criterion checked.
'''
#

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
X0002 found_group True
group_conflict1: True ; criterion:  dog
group_conflict2 True
X0003 found_group True
X0004 found_group True
group_conflict1: True ; criterion:  smoking
group_conflict2 True
group_conflict1: True ; criterion:  smoking
group_conflict2 True
X0005 found_group True
X0006 found_group True
group_conflict1: True ; criterion:  smoking
group_conflict2 True
group_conflict1: True ; criterion:  smoking
group_conflict2 True
X0007 found_group True


'\n$$$  BUGS  $$$$\n-- If dog conflict is present in first row, first person gets group 1 and then everyone else is 0 (unexpected, but not necessarily wrong.\n-- Second criterion overwrites groups, so final hard-grouping is only based on last criterion checked.\n'

In [77]:
eligibles_loc

Unnamed: 0,UID,hard_group,is_offender,smoking_has,smoking_conflict,dog_has,dog_conflict
1,X0002,0,n,y,n,y,n
2,X0003,1,n,y,n,n,y
3,X0004,0,n,n,n,y,n
4,X0005,2,n,n,y,y,n
5,X0006,0,n,y,n,y,n
6,X0007,2,n,n,y,y,n


In [78]:
# Report groups
print('group#, count')
eligibles_loc['hard_group'].value_counts()


group#, count


0    3
2    2
1    1
Name: hard_group, dtype: int64