# Day 4: Passport Processing

#### Zach Bogart

- https://adventofcode.com/2020/day/4

## Setup

- had to pull in the data, split it up, and make it into a list of dictionaries

In [88]:
import re
import pandas as pd
pd.set_option("display.max_rows", 250)

### Read in File

In [2]:
with open("inputs/04-input.txt", "r") as f:
    raw = f.readlines()

In [3]:
# make one big string
one_string = "".join(raw)

In [4]:
# split up with different delimiters
one_string = re.sub("\n\n", "---", one_string)
one_string = re.sub("\n$", "", one_string)
one_string = re.sub("\n", ",", one_string)
one_string = re.sub(" ", ",", one_string)

In [92]:
one_string[:500]

'byr:1983,iyr:2017,pid:796082981,cid:129,eyr:2030,ecl:oth,hgt:182cm---iyr:2019,cid:314,eyr:2039,hcl:#cfa07d,hgt:171cm,ecl:#0180ce,byr:2006,pid:8204115568---byr:1991,eyr:2022,hcl:#341e13,iyr:2016,pid:729933757,hgt:167cm,ecl:gry---hcl:231d64,cid:124,ecl:gmt,eyr:2039,hgt:189in,pid:#9c3ea1---ecl:#1f58f9,pid:#758e59,iyr:2022,hcl:z,byr:2016,hgt:68,eyr:1933---hcl:#fffffd,ecl:gry,eyr:2022,hgt:172cm,pid:781914826,byr:1930,iyr:2018---hcl:#08df7e,ecl:grn,byr:1942,eyr:2028,iyr:2011,cid:141,pid:319110455,hgt:'

In [6]:
# make list of strings describing each dict
dict_strings = one_string.split("---")

In [7]:
# unpack each string into a dict, append
passports = []
for string in dict_strings:
    result = dict((key, val)
          for key, val in (item.split(':')
          for item in string.split(",")))
    passports.append(result)

In [8]:
passports[:3]

[{'byr': '1983',
  'iyr': '2017',
  'pid': '796082981',
  'cid': '129',
  'eyr': '2030',
  'ecl': 'oth',
  'hgt': '182cm'},
 {'iyr': '2019',
  'cid': '314',
  'eyr': '2039',
  'hcl': '#cfa07d',
  'hgt': '171cm',
  'ecl': '#0180ce',
  'byr': '2006',
  'pid': '8204115568'},
 {'byr': '1991',
  'eyr': '2022',
  'hcl': '#341e13',
  'iyr': '2016',
  'pid': '729933757',
  'hgt': '167cm',
  'ecl': 'gry'}]

# Part 1

- check for 8 keys OR 7 keys but missing `cid`

In [59]:
valid_passports_1 = []
for entry in passports:
    num_fields = len(entry.keys())
#     print(num_fields)
    if ((num_fields == 8) or 
        (num_fields == 7 and 'cid' not in entry.keys())):
            valid_passports_1.append(entry)

In [61]:
len(valid_passports_1)

239

# Part 2

- a lot more to check!
- **got tripped up with regular expressions**: Had one more than correct answer (189). Happened to input 188 to see a new error message, but ended up getting the right answer! Took a while to track down looking at the results as a pandas dataframe. Had to me more explicit with the regex for `pid` in particular (could have happened for any of them initially). Didn't specify bounds of string with `^$` so had a `pid` with ten digits that went unnoticed. Dang! Super sneaky bug!
    - Be explicit with regex!

In [83]:
valid_passports_2 = []
for entry in valid_passports_1:
    num_fields = len(entry.keys())
    if ((num_fields == 8) or 
        (num_fields == 7 and 'cid' not in entry.keys())):
            
            # byr (birth year)
            byr = int(entry['byr'])
            if byr < 1920 or byr > 2002:
                continue
                
            # iyr (issue year)
            iyr = int(entry['iyr'])
            if iyr < 2010 or iyr > 2020:
                continue
            
            # eyr (expiration year)
            eyr = int(entry['eyr'])
            if eyr < 2020 or eyr > 2030:
                continue
            
            # hgt (height)
            hgt = entry['hgt']
            if not re.match("^\d+[a-z]{2}$", hgt):
                continue
            
            hgt_unit = entry['hgt'][-2:]
            hgt_value = int(entry['hgt'][:-2])
            
            if hgt_unit not in ['cm', 'in']:
                continue
            
            if hgt_unit == "cm":
                if hgt_value < 150 or hgt_value > 193:
                    continue
            if hgt_unit == "in":
                if hgt_value < 59 or hgt_value > 76:
                    continue
            
            # hcl (hair color)
            hcl = entry['hcl']
            if not (re.match("^#[0-9a-f]{6}$", hcl)):
                continue
        
            # ecl (eye color)
            ecl = entry['ecl']
            if ecl not in ['amb','blu','brn','gry','grn','hzl','oth']:
                continue
                
            # pid (passport id)
            pid = entry['pid']
            if not (re.match("^[0-9]{9}$", pid)):
                continue
                
            # at the end, it is valid    
            valid_passports_2.append(entry)
            

In [85]:
len(valid_passports_2)

188

# Results

### Part 1

In [89]:
pd.DataFrame(valid_passports_1)

Unnamed: 0,iyr,cid,eyr,hcl,hgt,ecl,byr,pid
0,2019,314.0,2039,#cfa07d,171cm,#0180ce,2006,8204115568
1,2016,,2022,#341e13,167cm,gry,1991,729933757
2,2022,,1933,z,68,#1f58f9,2016,#758e59
3,2018,,2022,#fffffd,172cm,gry,1930,781914826
4,2011,141.0,2028,#08df7e,186cm,grn,1942,319110455
5,2005,163.0,1994,2964fb,170cm,#a08502,2005,188cm
6,2013,,2030,#fffffd,168cm,grn,1928,705547886
7,2019,219.0,2029,#602927,163cm,oth,1943,016251942
8,2010,,2026,#efcc98,184cm,gry,1942,117647952
9,2011,243.0,2027,#888785,154cm,blu,1962,362697676


### Part 2

In [90]:
pd.DataFrame(valid_passports_2)

Unnamed: 0,byr,eyr,hcl,iyr,pid,hgt,ecl,cid
0,1991,2022,#341e13,2016,729933757,167cm,gry,
1,1930,2022,#fffffd,2018,781914826,172cm,gry,
2,1942,2028,#08df7e,2011,319110455,186cm,grn,141.0
3,1928,2030,#fffffd,2013,705547886,168cm,grn,
4,1943,2029,#602927,2019,16251942,163cm,oth,219.0
5,1942,2026,#efcc98,2010,117647952,184cm,gry,
6,1962,2027,#888785,2011,362697676,154cm,blu,243.0
7,1965,2030,#435634,2016,779104554,154cm,blu,
8,1988,2030,#cfa07d,2016,951967790,167cm,grn,
9,1941,2028,#341e13,2016,806979833,179cm,brn,83.0
