# Counter
使用介绍
A Counter is a dict subclass for counting hashable objects. It is an unordered collection where elements are stored as dictionary keys and their counts are stored as dictionary values. Counts are allowed to be any integer value including zero or negative counts. 


In [11]:
from collections import Counter
cnt = Counter()
for word in ['red', 'blue', 'red', 'green', 'blue', 'blue']:
    cnt[word] += 1
print(cnt)
print(sorted(cnt,key = cnt.get,reverse=True))


Counter({'blue': 3, 'red': 2, 'green': 1})
['blue', 'red', 'green']


In [2]:
#most_common([n]):Return a list of the n most common elements and their counts from the most common to the least. 
#If n is omitted or None, most_common() returns all elements in the counter. 
Counter('abracadabra').most_common(3)

[('a', 5), ('b', 2), ('r', 2)]

In [4]:
#substract
c = Counter(a=4, b=2, c=0, d=-2)
d = Counter(a=1, b=2, c=3, d=4)
c.subtract(d)
print(c)

Counter({'a': 3, 'b': 0, 'c': -3, 'd': -6})


其他函数：
sum(c.values())                 # total of all counts
c.clear()                       # reset all counts
list(c)                         # list unique elements
set(c)                          # convert to a set
dict(c)                         # convert to a regular dictionary
c.items()                       # convert to a list of (elem, cnt) pairs
Counter(dict(list_of_pairs))    # convert from a list of (elem, cnt) pairs
c.most_common()[:-n-1:-1]       # n least common elements
c += Counter()                  # remove zero and negative counts

In [24]:
import numpy as np
import tensorflow as tf

def get_context(words,idx,window_size):
    """
    words:输入
    idx:input_word的索引
    window_size:窗口大小
    备注：python的切片不包含最右边的元素，例如a[1:3]返回a[1],a[2],而a[:3]返回前三个元素，a[0]a[1],a[2]
    第二点就是这里的上下文不包含input_word自身
    """
    target_window = np.random.randint(1,window_size+1)
    print(target_window)
    start = (idx - target_window ) if (idx - target_window > 0) else 0
    end = idx + target_window
    target_words = set(words[start:idx]+words[idx+1:end+1])
    return list(target_words)

#构建batch
def get_batches(words,batch_size,window_size):
    batch_num = len(set(words))//batch_size
    words = words[:batch_num*batch_size]
    
    for ii in range(0,len(words),batch_size):
        x,y=[],[]
        batch = words[ii:ii+batch_size]
        for idx in range(len(batch)):
            batch_x = batch[idx]
            batch_y = get_context(batch,idx,window_size)
            x.append([batch_x]*len(batch_y))
            y.append([batch_y])
        yield x,y

words=['quick', 'brown','fox', 'jumps', 'over']
batch_size = 5
window_size = 5
batches = get_batches(words,batch_size,window_size)
for x,y in batches:
    print(x,'\n')
    print(y,'\n')

1
4
3
1
3
[['quick'], ['brown', 'brown', 'brown', 'brown'], ['fox', 'fox', 'fox', 'fox'], ['jumps', 'jumps'], ['over', 'over', 'over']] 

[[['brown']], [['quick', 'jumps', 'fox', 'over']], [['quick', 'jumps', 'brown', 'over']], [['fox', 'over']], [['jumps', 'brown', 'fox']]] 

