# collections: 컨테이너 데이터형

- namedtuple() : 이름 붙은 필드를 갖는 튜플 서브 클래스를 만들기 위한 팩토리 함수

- deque : 양쪽 끝에서 빠르게 추가와 삭제를 할 수 있는 리스트류 컨테이너

- ChainMap : 여러 매핑의 단일 뷰를 만드는 딕셔너리류 클래스

- Counter : 해시 가능한 객체를 세는 데 사용하는 딕셔너리 서브 클래스

- OrderedDict : 항목이 추가된 순서를 기억하는 딕셔너리 서브 클래스

- defaultdict : 누락된 값을 제공하기 위해 팩토리 함수를 호출하는 딕셔너리 서브 클래스

- UserDict : 더 쉬운 딕셔너리 서브 클래싱을 위해 딕셔너리 객체를 감싸는 래퍼

- UserList : 더 쉬운 리스트 서브 클래싱을 위해 리스트 객체를 감싸는 래퍼

- UserString : 더 쉬운 문자열 서브 클래싱을 위해 문자열 객체를 감싸는 래퍼

# 01 namedtuple

- 튜플처럼 immutable
- 이름을 통해 데이터로 접근 가능
- 메모리 활용 최적화(성능상에 이점이 있음) -> 활용하려는 자료형에 비해 어느정도 성능상에 이점이 있다는 것은 시간측정 필요


In [3]:
l = [10, 20, 30]
t = (l, 10, 20)
l[2] = 100
t

([10, 20, 100], 10, 20)

In [4]:
# Basic example
from collections import namedtuple

Point = namedtuple('Point', ['x', 'y'])
p = Point(11, y=22)
p[0] + p[1]

33

In [5]:
p

Point(x=11, y=22)

In [7]:
p.x, p.y

(11, 22)

In [8]:
p[x]  # error

NameError: name 'x' is not defined

In [11]:
i, j = p

i, j
i
j

22

In [12]:
Point

__main__.Point

In [13]:
dir(Point)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_asdict',
 '_field_defaults',
 '_fields',
 '_fields_defaults',
 '_make',
 '_replace',
 'count',
 'index',
 'x',
 'y']

In [14]:
d = {
    'x': 100,
    'y': 200,
}

In [15]:
p = Point(**d)
p.x

100

In [16]:
p._asdict()

{'x': 100, 'y': 200}

In [17]:
p._fields

('x', 'y')

In [19]:
re_p = p._replace(x=1000)
re_p

Point(x=1000, y=200)

In [20]:
p

Point(x=100, y=200)

In [23]:
p.index(100)
p.index(200)

1

In [22]:
p.count(100)

1

In [26]:
p.count(300)
p.index('x')

ValueError: tuple.index(x): x not in tuple

In [27]:
p.count('x')

0

In [28]:
from dataclasses import dataclass  # 3.7부터 사용 가능, 구조체 import dataclass


@dataclass
class Point:
    x: int = None
    y: int = None


print(Point())

Point(x=None, y=None)


In [30]:
p = Point(10, 20)
p

Point(x=10, y=20)

In [31]:
i, j = p  # error
i

TypeError: cannot unpack non-iterable Point object

In [32]:
p.x, p.y

(10, 20)

In [34]:
dir(p)

['__annotations__',
 '__class__',
 '__dataclass_fields__',
 '__dataclass_params__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'x',
 'y']

In [35]:
from collections import namedtuple

기술명세 = namedtuple('기술', '기술이름, 자격증, 연차')  # csv 파일을 가지고 올 때 유용
장재영 = 기술명세('파이썬', '정보처리기사', '3')
장재영

기술(기술이름='파이썬', 자격증='정보처리기사', 연차='3')

In [36]:
dir(기술명세)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_asdict',
 '_field_defaults',
 '_fields',
 '_fields_defaults',
 '_make',
 '_replace',
 'count',
 'index',
 '기술이름',
 '연차',
 '자격증']

In [37]:
dir(장재영)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_asdict',
 '_field_defaults',
 '_fields',
 '_fields_defaults',
 '_make',
 '_replace',
 'count',
 'index',
 '기술이름',
 '연차',
 '자격증']

# 02 deque

- 양쪽 끝에서 빠르게 추가와 삭제를 할 수 있는 리스트류 컨테이너
- 양방향 큐
- 데이터의 회전도 가능
- amxlen을 설정하여 최대 항목 수를 설정

In [39]:
from collections import deque

a = [10, 20, 30, 40, 50]
d = deque(a)
d

deque([10, 20, 30, 40, 50])

In [40]:
dir(d)

['__add__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__rmul__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'append',
 'appendleft',
 'clear',
 'copy',
 'count',
 'extend',
 'extendleft',
 'index',
 'insert',
 'maxlen',
 'pop',
 'popleft',
 'remove',
 'reverse',
 'rotate']

In [41]:
d.append(100)
d

deque([10, 20, 30, 40, 50, 100])

In [43]:
d.appendleft(1000)
d

deque([1000, 1000, 10, 20, 30, 40, 50, 100])

In [44]:
temp = d.pop()
temp

100

In [45]:
temp = d.popleft()
temp

1000

In [46]:
d

deque([1000, 10, 20, 30, 40, 50])

In [47]:
temp = d.popleft()
temp

1000

In [48]:
d

deque([10, 20, 30, 40, 50])

In [49]:
d.rotate(2)
d

deque([40, 50, 10, 20, 30])

In [50]:
d.rotate(-1)

In [51]:
d

deque([50, 10, 20, 30, 40])

# 03 ChainMap

- 여러개의 컨테이너 자료형을 연결할 수 있음

In [52]:
from collections import ChainMap

oneDict = {'one': 1, 'two': 2, 'three': 3}
twoDict = {'four': 4}

chain = ChainMap(oneDict, twoDict)
chain

ChainMap({'one': 1, 'two': 2, 'three': 3}, {'four': 4})

In [53]:
dir(chain)

['_MutableMapping__marker',
 '__abstractmethods__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__missing__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 'clear',
 'copy',
 'fromkeys',
 'get',
 'items',
 'keys',
 'maps',
 'new_child',
 'parents',
 'pop',
 'popitem',
 'setdefault',
 'update',
 'values']

In [54]:
'one' in chain

True

In [55]:
'four' in chain

True

In [56]:
'five' in chain

False

In [57]:
len(chain)

4

In [60]:
chain.values()
chain.keys()
chain.items()

ItemsView(ChainMap({'one': 1, 'two': 2, 'three': 3}, {'four': 4}))

In [61]:
chain[0]  # error

KeyError: 0

In [64]:
chain['oneDict']  # error

KeyError: 'oneDict'

In [65]:
chain.maps

[{'one': 1, 'two': 2, 'three': 3}, {'four': 4}]

In [66]:
chain.maps[0]

{'one': 1, 'two': 2, 'three': 3}

In [67]:
chain.maps[1]

{'four': 4}

In [68]:
one = [1, 2, 3, 4]
two = [5, 6, 7, 8]

three = ChainMap(one, two)
three

ChainMap([1, 2, 3, 4], [5, 6, 7, 8])

In [69]:
6 in three

True

In [70]:
three.maps[0]

[1, 2, 3, 4]

In [71]:
three.maps[1]

[5, 6, 7, 8]

# 04 Counter

- 객체의 요소 개수를 key와 value 값으로

In [73]:
from collections import Counter

a = [1, 1, 1, 1, 2, 3, 2, 3, 4, 4, 2, 3, 5, 6, 4, 6, 7, 8, 7, 7, 7, 6, 8, 8, 6, 6, 5]

c = Counter(a)
c

Counter({1: 4, 2: 3, 3: 3, 4: 3, 5: 2, 6: 5, 7: 4, 8: 3})

In [74]:
dir(c)

['__add__',
 '__and__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__init__',
 '__init_subclass__',
 '__ior__',
 '__isub__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__missing__',
 '__module__',
 '__ne__',
 '__neg__',
 '__new__',
 '__or__',
 '__pos__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__weakref__',
 '_keep_positive',
 'clear',
 'copy',
 'elements',
 'fromkeys',
 'get',
 'items',
 'keys',
 'most_common',
 'pop',
 'popitem',
 'setdefault',
 'subtract',
 'update',
 'values']

In [76]:
for i in c.elements():
    print(i)
    print('---')

1
---
1
---
1
---
1
---
2
---
2
---
2
---
3
---
3
---
3
---
4
---
4
---
4
---
5
---
5
---
6
---
6
---
6
---
6
---
6
---
7
---
7
---
7
---
7
---
8
---
8
---
8
---


In [80]:
c.keys()
c.values()
c.items()

dict_items([(1, 4), (2, 3), (3, 3), (4, 3), (5, 2), (6, 5), (7, 4), (8, 3)])

In [81]:
for i, j in c.items():
    print(i, j)

1 4
2 3
3 3
4 3
5 2
6 5
7 4
8 3


In [82]:
c.most_common()

[(6, 5), (1, 4), (7, 4), (2, 3), (3, 3), (4, 3), (8, 3), (5, 2)]

In [85]:
s = 'hello, world'
sc = Counter(s)
sc

Counter({'h': 1,
         'e': 1,
         'l': 3,
         'o': 2,
         ',': 1,
         ' ': 1,
         'w': 1,
         'r': 1,
         'd': 1})

In [86]:
sc.update('hello')

In [87]:
sc

Counter({'h': 2,
         'e': 2,
         'l': 5,
         'o': 3,
         ',': 1,
         ' ': 1,
         'w': 1,
         'r': 1,
         'd': 1})

In [88]:
sc.subtract('hello')

In [89]:
sc

Counter({'h': 1,
         'e': 1,
         'l': 3,
         'o': 2,
         ',': 1,
         ' ': 1,
         'w': 1,
         'r': 1,
         'd': 1})

In [90]:
sc.subtract('hello')
sc

Counter({'h': 0,
         'e': 0,
         'l': 1,
         'o': 1,
         ',': 1,
         ' ': 1,
         'w': 1,
         'r': 1,
         'd': 1})

In [91]:
sc.subtract(Counter('hello'))
sc

Counter({'h': -1,
         'e': -1,
         'l': -1,
         'o': 0,
         ',': 1,
         ' ': 1,
         'w': 1,
         'r': 1,
         'd': 1})

In [92]:
d = {'one': 100, 'two': 200, 'three': 200}
s = Counter(d)
s

Counter({'one': 100, 'two': 200, 'three': 200})

In [93]:
d = {'one': '100', 'two': '200', 'three': '200'}
s = Counter(d)
s

Counter({'one': '100', 'two': '200', 'three': '200'})

# 05 OrderedDict

- 순서가 있는 dict 자료형
- LRU 알고리즘을 구현하는 용도로 자주 사용
- 3.6버전까지 딕셔너리 순서 없음
- 3.7버전부터 딕셔너리 순서 유지

In [102]:
from collections import OrderedDict

oneDict = {'one': 1, 'two': 2, 'three': 3}
d = OrderedDict(oneDict)
d

OrderedDict([('one', 1), ('two', 2), ('three', 3)])

In [103]:
dir(d)

['__class__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'clear',
 'copy',
 'fromkeys',
 'get',
 'items',
 'keys',
 'move_to_end',
 'pop',
 'popitem',
 'setdefault',
 'update',
 'values']

In [104]:
d.move_to_end('one')
d

OrderedDict([('two', 2), ('three', 3), ('one', 1)])

In [105]:
d.move_to_end('two')  # 맨 뒤로 보냄
d

OrderedDict([('three', 3), ('one', 1), ('two', 2)])

In [106]:
d.move_to_end('two', False)  # 맨 앞으로 보냄
d

OrderedDict([('two', 2), ('three', 3), ('one', 1)])

In [107]:
d.move_to_end('one', False)  # 맨 앞으로 보냄
d

OrderedDict([('one', 1), ('two', 2), ('three', 3)])

In [108]:
d.popitem(True)  # 맨 뒤에서  꺼냄

('three', 3)

In [109]:
d.popitem(False)  #맨 앞에서 꺼냄

('one', 1)

In [111]:
oneDict = {'one': 1, 'two': 2, 'three': 3}
d = OrderedDict(oneDict)

# d.popitem() # 맨 뒤에서 꺼내는 것이 default
d.move_to_end('one')  # 맨 뒤로 보내는 것이 default, True
d

OrderedDict([('two', 2), ('three', 3), ('one', 1)])

# 06 defaultdict

- 키로 어떤 값이 들어올지 모를 경우 사용


In [112]:
from collections import defaultdict

oneDict = {'one': 1, 'two': 2, 'three': 3}
d = defaultdict(oneDict)
d

TypeError: first argument must be callable or None

In [113]:
d = defaultdict(str)
d['one'] = '1'
d['two'] = '2'
d['three']
d

defaultdict(str, {'one': '1', 'two': '2', 'three': ''})

In [114]:
d = defaultdict(list)
d['one'] = '1'
d['two'] = '2'
d['three']
d

defaultdict(list, {'one': '1', 'two': '2', 'three': []})

In [115]:
d = defaultdict(int)
d['one'] = '1'
d['two'] = '2'
d['three']
d

defaultdict(int, {'one': '1', 'two': '2', 'three': 0})

In [116]:
dir(d)

['__class__',
 '__contains__',
 '__copy__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__missing__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'clear',
 'copy',
 'default_factory',
 'fromkeys',
 'get',
 'items',
 'keys',
 'pop',
 'popitem',
 'setdefault',
 'update',
 'values']

In [117]:
d = defaultdict(int)
for i in range(10):
    d[i] += 1

d

defaultdict(int, {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1})

In [118]:
# 특히 리스트의 경우 여러개의 중복값을 저장하기 위한 용도로 많이 사용
강좌 = [
    ('인스타그램클론', 1123),
    ('정규표현식', 23),
    ('MBTI페이지만들기', 1313),
    ('python부트캠프', 312),
    ('눈떠보니코딩테스트전날', 1623)
]

강좌

[('인스타그램클론', 1123),
 ('정규표현식', 23),
 ('MBTI페이지만들기', 1313),
 ('python부트캠프', 312),
 ('눈떠보니코딩테스트전날', 1623)]

In [120]:
d = defaultdict(list)
for 강의, 수강생 in 강좌:
    if 수강생 < 100: d['십'].append(강의)
    elif 수강생 < 1000: d['백'].append(강의)
    elif 수강생 < 10000: d['천'].append(강의)

d

defaultdict(list,
            {'천': ['인스타그램클론', 'MBTI페이지만들기', '눈떠보니코딩테스트전날'],
             '십': ['정규표현식'],
             '백': ['python부트캠프']})

# 07 UserDict, UserList, UserString

- 문제 없이 메서드를 오버라이딩할 수 있기 때문에 dict, list, str을 상속하는 것보다 UserDict, UserList, UserString을  상속하는것이 좋음
- 그러나 간단하게 사용할때에는 dict, list, str을 상속받아 사용하길.
-

In [121]:
from collections import UserDict, UserList, UserString

class CustomDict(UserDict):
    def contain_value(self, values):
        return values in self.data.values()

d = CustomDict()
dir(d)

['_MutableMapping__marker',
 '__abstractmethods__',
 '__class__',
 '__contains__',
 '__copy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 'clear',
 'contain_value',
 'copy',
 'data',
 'fromkeys',
 'get',
 'items',
 'keys',
 'pop',
 'popitem',
 'setdefault',
 'update',
 'values']

In [122]:
d['one'] = 1
d['two'] = 2

'one' in d

True

In [123]:
d

{'one': 1, 'two': 2}

In [124]:
type(d.data)

dict

In [125]:
d.data

{'one': 1, 'two': 2}

In [127]:
d.contain_value(1)

True