In [1]:
from collections import OrderedDict

# OrderedDict

the order of insertion is maintained when key and values are inserted into the dictionary. 

If we try to insert a key again, this will overwrite the previous value for that key. (NO DUPLICATE)

In [2]:
roll_no = OrderedDict([
    (11, 'Shubham'),
    (9,  'Pankaj'),
    (17, 'JournalDev')
])

In [3]:
for key, value in roll_no.items():
    print(key, value)

11 Shubham
9 Pankaj
17 JournalDev


In [4]:
roll_no[11] = 'Bhupen'

# Default Dict

The default dictionary can contain duplicate keys. 

The advantage of using the default dictionary is that we can collect items which belong to the same key. 

In [5]:
from collections import defaultdict

In [6]:
marks = [
    ('Shubham', 89),
    ('Pankaj', 92),
    ('JournalDev', 99),
    ('JournalDev', 98),
    ('Shubham', 89),
    ('Pankaj', 92),
]

In [7]:
dict_marks = defaultdict(list)

In [8]:
print(list(dict_marks.items()))

[]


In [9]:
for key, value in dict_marks:
    print(key, value)

In [10]:
for key, value in marks:
    dict_marks[key].append(value)

In [11]:
print(list(dict_marks.items()))

[('Shubham', [89, 89]), ('Pankaj', [92, 92]), ('JournalDev', [99, 98])]


# Counter
The Counter collections allow us to keep a count of all the items which are inserted into the collection with the keys. 

In [4]:
from collections import Counter

In [5]:
marks_list = [
    ('Shubham', 89),
    ('Pankaj', 92),
    ('JournalDev', 99),
    ('JournalDev', 98)
]

In [6]:
count = Counter(name for name, marks in marks_list)
count

Counter({'Shubham': 1, 'Pankaj': 1, 'JournalDev': 2})

#### more example

In [7]:
Counter(['a', 'b', 'c', 'a', 'b', 'b'])

Counter({'a': 2, 'b': 3, 'c': 1})

In [16]:
Counter({'a':2, 'b':3, 'c':1})

Counter({'a': 2, 'b': 3, 'c': 1})

In [17]:
Counter(a=2, b=3, c=1)

Counter({'a': 2, 'b': 3, 'c': 1})

In [8]:
Counter('fjkfkfkkfiorflflfllff')

Counter({'f': 9, 'j': 1, 'k': 4, 'i': 1, 'o': 1, 'r': 1, 'l': 4})

#### another example

In [19]:
c = Counter()
print ('Initial :', c)

Initial : Counter()


In [20]:
c.update('abcdaab')
print ('Seq :', c)

Seq : Counter({'a': 3, 'b': 2, 'c': 1, 'd': 1})


In [21]:
c.update({'o':1, 'p':5})
print ('Dict    :', c)

Dict    : Counter({'p': 5, 'a': 3, 'b': 2, 'c': 1, 'd': 1, 'o': 1})


In [22]:
c.update({'o':1, 'p':15})
print ('Dict    :', c)

Dict    : Counter({'p': 20, 'a': 3, 'b': 2, 'o': 2, 'c': 1, 'd': 1})


In [23]:
for k, v in c.items():
    print(k, v)

a 3
b 2
c 1
d 1
o 2
p 20


In [24]:
c['p']

20

In [25]:
line = 'kffkfk fkfkfk fkfkfkfk '

In [26]:
Counter(line.split(' '))

Counter({'kffkfk': 1, 'fkfkfk': 1, 'fkfkfkfk': 1, '': 1})

## Accessing Counters

In [27]:
c = Counter('abcdaab')

for letter in 'abcde':
    print ('%s : %d' % (letter, c[letter]))

a : 3
b : 2
c : 1
d : 1
e : 0


# Elements

In [28]:
c = Counter('extremely')

print(c)

Counter({'e': 3, 'x': 1, 't': 1, 'r': 1, 'm': 1, 'l': 1, 'y': 1})


In [53]:
c['z'] = 0

In [54]:
print(c)

Counter({'e': 3, 'x': 1, 't': 1, 'r': 1, 'm': 1, 'l': 1, 'y': 1, 'z': 0})


# Most_Common

In [29]:
c = Counter()

In [30]:
with open(r'E:\MYLEARN\2-ANALYTICS-DataScience\datasets\spacy-ex1.txt', 'rt') as f:
    for line in f:
        c.update(line.lower().split(' '))

In [31]:
c

Counter({'the': 12,
         'history': 1,
         'of': 4,
         'natural': 3,
         'language': 3,
         'processing': 2,
         '(nlp)': 1,
         'generally': 1,
         'started': 1,
         'in': 7,
         '1950s,': 1,
         'although': 1,
         'work': 1,
         'can': 1,
         'be': 2,
         'found': 2,
         'from': 1,
         'earlier': 1,
         'periods.': 1,
         '1950,': 1,
         'alan': 1,
         'turing': 2,
         'published': 1,
         'an': 1,
         'article': 1,
         'titled': 1,
         '"computing': 1,
         'machinery': 1,
         'and': 4,
         'intelligence"': 1,
         'which': 2,
         'proposed': 1,
         'what': 1,
         'is': 1,
         'now': 1,
         'called': 1,
         'test': 1,
         'as': 1,
         'a': 7,
         'criterion': 1,
         'intelligence[clarification': 1,
         'needed].\n': 1,
         '\n': 2,
         'georgetown': 1,
         'experiment':

In [27]:
c.most_common(3)

[('the', 12), ('in', 7), ('a', 7)]

# arithmetic

In [96]:
c1 = Counter(['a', 'b', 'c', 'a', 'b', 'b'])
c2 = Counter('aaaaaabbbbbbb')

In [97]:
c1

Counter({'a': 2, 'b': 3, 'c': 1})

In [98]:
c2

Counter({'a': 6, 'b': 7})

In [102]:
c1 + c2

Counter({'a': 8, 'b': 10, 'c': 1})

In [103]:
c1 - c2

Counter({'c': 1})

In [104]:
c1 & c2

Counter({'a': 2, 'b': 3})

In [105]:
c2 & c1

Counter({'a': 2, 'b': 3})

In [107]:
# maximum
c1 | c2

Counter({'a': 6, 'b': 7, 'c': 1})

# Counting words

In [32]:
cnt = Counter()

for word in ['red', 'blue', 'red', 'green', 'blue', 'blue']:
    cnt[word] += 1

print (cnt)


Counter({'blue': 3, 'red': 2, 'green': 1})


OR

In [33]:
mywords = ['red', 'blue', 'red', 'green', 'blue', 'blue']

cnt = Counter(mywords)

print (cnt)

Counter({'blue': 3, 'red': 2, 'green': 1})
