#### <font color="brown">Dictionary, Counter, Reading/Writing Files</font>

---

#### <font color="brown">Dictionary</font>

In [15]:
empty_dict1 = {}  # conventional
empty_dict2 = dict()  # legit, but not often used
grades = {'Jenna': 80, 'Dylan': 75, 'Anis': 65}
grade = grades['Jenna']
print(grade)


80


In [16]:
grade = grades['Keisha']  # key 'Keisha' is not in dictionary

KeyError: 'Keisha'

In [17]:
# using ternary operator
print("Key 'Keisha' is in grades") if 'Keisha' in grades else print("Key 'Keisha' is not in grades")

Key 'Keisha' is not in grades


In [18]:
# get method on grades returns the given default value 
# if key not in dictionary
grade = grades.get('Keisha', 0)
print(grade)

0


In [19]:
# add a key-value pair to dictionary
grades['Keisha'] = 82

In [20]:
for key in grades:
    print(f'{key}: {grades[key]}')

Jenna: 80
Dylan: 75
Anis: 65
Keisha: 82


In [22]:
for key in grades.keys():  # key in grades is short for key in grades.keys()
    print(f'{key}: {grades[key]}')

Jenna: 80
Dylan: 75
Anis: 65
Keisha: 82


In [23]:
# get keys, values, items
print(grades.keys())
print(grades.values())
print(grades.items())    

dict_keys(['Jenna', 'Dylan', 'Anis', 'Keisha'])
dict_values([80, 75, 65, 82])
dict_items([('Jenna', 80), ('Dylan', 75), ('Anis', 65), ('Keisha', 82)])


In [10]:
# keys are unique, so assigning to existing key means changing its value
grades['Dylan'] = 78
print(grades['Dylan'])

78


In [9]:
for key, value in grades.items():  # each item is a tuple
    print(key,':',value)

Jenna : 80
Dylan : 75
Anis : 65
Keisha : 82


In [4]:
# key can map to any kind of value, and values could be heterogeneous
prereqs = {'cs112': 'cs111', 'cs336':['cs112','cs205']}
print(prereqs['cs112'])
print(prereqs['cs336'])

cs111
['cs112', 'cs205']


In [7]:
# value can even be a dictionary
counties = {'nj':{'middlesex':825000,'bergen':900000,'essex':795000},
            'ca':{'los angeles':10000000,'san diego':3300000}}
counties['nj']['essex']

795000

In [36]:
# list can be used for keys, all set to same value
mydict = dict.fromkeys(['x','y'],10)
mydict

{'x': 10, 'y': 10}

In [37]:
# if no value is provided, defaults to None
mydict = dict.fromkeys('abcde')
mydict

{'a': None, 'b': None, 'c': None, 'd': None, 'e': None}

**You can initialize dicts out of a list of tuples**

In [25]:
mydict = dict([('cs111',800),('cs112',500)])
mydict

{'cs111': 800, 'cs112': 500}

In [27]:
mydict2 = dict([('table',2),('chair',1),('table',1),('chair',3),('desk',1)])
mydict2

{'table': 1, 'chair': 3, 'desk': 1}

**when a key is encountered again, value is updated**

---

#### <font color="brown">Default Dictionary</font>

In [29]:
# count frequency of quiz scores 
quiz_scores = [4,5,9,7,6,4,2,5,7,9,1,10,7,6,9,8,5]
scores = {}
for qs in quiz_scores:
    if qs in scores:
        scores[qs] += 1
    else:
        scores[qs] = 1
print(scores)

{4: 2, 5: 3, 9: 3, 7: 3, 6: 2, 2: 1, 1: 1, 10: 1, 8: 1}


In [30]:
# using default dictionary 
from collections import defaultdict

scores2 = defaultdict(int)  # scores are auto initialized to 0
for qs in quiz_scores:
      scores2[qs] += 1   
print(scores2)

defaultdict(<class 'int'>, {4: 2, 5: 3, 9: 3, 7: 3, 6: 2, 2: 1, 1: 1, 10: 1, 8: 1})


**List values are useful in practice**

In [34]:
lst =  [('table',2),('chair',1),('table',1),('chair',3),('desk',1)]
mydict3 = defaultdict(list)
for k,v in lst:
    mydict3[k].append(v)   # default list implies starting list is empty
mydict3

defaultdict(list, {'table': [2, 1], 'chair': [1, 3], 'desk': [1]})

---

#### <font color="brown">Ordered Dictionary</font>

In [41]:
# preserves the order in which keys were added
from collections import OrderedDict

od = OrderedDict()
od['Hill center'] = 'Busch'
od['AB'] = 'College Ave'
od['Hickman'] = 'Douglas'
od['SEC'] = ['Busch']
od

OrderedDict([('Hill center', 'Busch'),
             ('AB', 'College Ave'),
             ('Hickman', 'Douglas'),
             ('SEC', ['Busch'])])

In [42]:
print(od.popitem())   # behaves like a stack
print(od)

('SEC', ['Busch'])
OrderedDict([('Hill center', 'Busch'), ('AB', 'College Ave'), ('Hickman', 'Douglas')])


---

#### <font color="brown">Counter</font>
Counter is basically a default dictionary

In [43]:
from collections import Counter

ctr = Counter([1,2,8,2,1,9,1])
ctr

Counter({1: 3, 2: 2, 8: 1, 9: 1})

In [44]:
print(ctr.items())
print(ctr.keys())
print(ctr.values())

dict_items([(1, 3), (2, 2), (8, 1), (9, 1)])
dict_keys([1, 2, 8, 9])
dict_values([3, 2, 1, 1])


In [45]:
ctr.update([1])
ctr.items()

dict_items([(1, 4), (2, 2), (8, 1), (9, 1)])

In [46]:
ctr.update([2,9])
print(ctr)

Counter({1: 4, 2: 3, 9: 2, 8: 1})


In [47]:
ctr1 = Counter()
ctr1.update(['this','that'])  # initial count is 0
ctr1

Counter({'this': 1, 'that': 1})

In [50]:
ctr2 = Counter('what goes around comes around')  # since string is an iterable
ctr2

Counter({'w': 1,
         'h': 1,
         'a': 3,
         't': 1,
         ' ': 4,
         'g': 1,
         'o': 4,
         'e': 2,
         's': 2,
         'r': 2,
         'u': 2,
         'n': 2,
         'd': 2,
         'c': 1,
         'm': 1})

In [51]:
ctr3 = Counter(["str1",2,2.5,2,"str2","str1"])
ctr3

Counter({'str1': 2, 2: 2, 2.5: 1, 'str2': 1})

---

#### <font color="brown">Reading and Writing Files</font>

In [55]:
oscars = {}
for line in open("oscars.txt"):   # read one line at a time
    movie, year = line.split(':')
    oscars[movie.strip()] = year.strip()
print(oscars)

{'Hurt Locker': '2011', 'The Artist': '2012', 'The Shape of Water': '2018', 'The Hurt Locker': '2010', 'Moonlight': '2017', 'Argo': '2013', 'Green Book': '2019', '12 Years a Slave': '2014', 'Spotlight': '2016', 'Birdman': '2015'}


In [56]:
# sort on year
oscar_years = sorted(oscars.items(),
          key=lambda movie: movie[1])
print(oscar_years)

[('The Hurt Locker', '2010'), ('Hurt Locker', '2011'), ('The Artist', '2012'), ('Argo', '2013'), ('12 Years a Slave', '2014'), ('Birdman', '2015'), ('Spotlight', '2016'), ('Moonlight', '2017'), ('The Shape of Water', '2018'), ('Green Book', '2019')]


In [60]:
print(grades)

{'Jenna': 80, 'Dylan': 75, 'Anis': 65, 'Keisha': 82}


In [62]:
# write data from grades dictionary into a file
scores_file = open("scores_file.txt","w")   # open a file in "write" mode
for key,value in grades.items():
    scores_file.write(key + ':' + str(value) + '\n')  # call write method on file
scores_file.close()

In [63]:
# read populations from file into a dictionary
# country name is key, population is value
# each line of file is <country>|<population>
# population may have commas, need to remove

def getPopulations(file):
    pops = {}
    for line in open(file):
        country, pop = line.split('|')
        population = int(pop.replace(',',''))  # using string replace method
        pops[country] = population
    return pops

In [64]:
populations = getPopulations('population.txt')

In [65]:
populations['China']

1347350000

In [66]:
populations['Nepal']

26620809

In [67]:
# list of countries with population over 100 million
large_pops = [c for c,p in populations.items() if p > 100000000 ]
print(large_pops)

['China', 'India', 'United States', 'Indonesia', 'Brazil', 'Pakistan', 'Nigeria', 'Bangladesh', 'Russia', 'Japan', 'Mexico']


**Counting words in a document**

In [70]:
word_counts = Counter()
for line in open('metamorphosis.txt'):
    tokens = line.split()  # separate into non-whitespace sequences
    for token in tokens:
        word_counts.update([token.lower().strip(',.')])  # strip ',' and '.' from words

In [71]:
print(word_counts)

Counter({'he': 10, 'to': 6, 'the': 4, 'that': 4, 'was': 4, 'his': 4, 'a': 3, 'and': 3, 'look': 2, 'at': 2, 'dull': 2, 'feel': 2, 'right': 2, 'have': 2, 'gregor': 1, 'then': 1, 'turned': 1, 'out': 1, 'window': 1, 'weather': 1, 'drops': 1, 'of': 1, 'rain': 1, 'could': 1, 'be': 1, 'heard': 1, 'hitting': 1, 'pane': 1, 'which': 1, 'made': 1, 'him': 1, 'quite': 1, 'sad': 1, 'how': 1, 'about': 1, 'if': 1, 'i': 1, 'sleep': 1, 'little': 1, 'bit': 1, 'longer': 1, 'forget': 1, 'all': 1, 'this': 1, 'nonsense': 1, 'thought': 1, 'but': 1, 'something': 1, 'unable': 1, 'do': 1, 'because': 1, 'used': 1, 'sleeping': 1, 'on': 1, 'in': 1, 'present': 1, 'state': 1, "couldn't": 1, 'get': 1, 'into': 1, 'position': 1, 'however': 1, 'hard': 1, 'threw': 1, 'himself': 1, 'onto': 1, 'always': 1, 'rolled': 1, 'back': 1, 'where': 1, 'must': 1, 'tried': 1, 'it': 1, 'hundred': 1, 'times': 1, 'shut': 1, 'eyes': 1, 'so': 1, "wouldn't": 1, 'floundering': 1, 'legs': 1, 'only': 1, 'stopped': 1, 'when': 1, 'began': 1, 'mil

In [79]:
# find top 5 most common words
for word, count in word_counts.most_common(5):
    print(word, count)

he 10
to 6
the 4
that 4
was 4
