## 集合基本操作

In [1]:
programming_languages = {'Python', 'Java', 'C#', 'Ruby', 'Go'}
squares = {x * x for x in range(10)}

In [2]:
print(programming_languages)
print(squares)

{'Ruby', 'Go', 'Python', 'C#', 'Java'}
{0, 1, 64, 4, 36, 9, 16, 49, 81, 25}


In [3]:
s = {}
true_s = set()
print(type(s))
print(type(true_s))

<class 'dict'>
<class 'set'>


In [4]:
dynamic_languages = {'Ruby', 'Python', 'JavaScript', 'Lua'}

In [5]:
programming_languages | dynamic_languages   # 并集 Union

{'C#', 'Go', 'Java', 'JavaScript', 'Lua', 'Python', 'Ruby'}

In [6]:
programming_languages & dynamic_languages   # 交集 Intersection

{'Python', 'Ruby'}

In [7]:
programming_languages - dynamic_languages # 差集 Difference

{'C#', 'Go', 'Java'}

In [8]:
dynamic_languages - programming_languages

{'JavaScript', 'Lua'}

In [9]:
programming_languages ^ dynamic_languages   # Symmetric Difference 

{'C#', 'Go', 'Java', 'JavaScript', 'Lua'}

In [10]:
# 普通做法 vs pythonic

backend_developers = ['John', 'Rose', 'Jane', 'Steven']
frontend_developers = ['May', 'Rose', 'Jane', 'Jonny']
full_stack_developers = []
for developer in backend_developers:
    if developer in frontend_developers:
        full_stack_developers.append(developer)
full_stack_developers

['Rose', 'Jane']

In [12]:
full_stack_developers = list(set(backend_developers) & set(frontend_developers))
full_stack_developers

['Jane', 'Rose']

: 

## 集合推导

In [3]:
from dataclasses import dataclass

@dataclass
class Person:
    name: str

In [4]:
people = [Person(f'name - {i}') for i in range(1000)]

In [5]:
def build_names():
    names = set()
    for person in people:
        names.add(person.name)
    return names

In [10]:
def build_names_comprehension():
    names = {person.name for person in people}
    return names

In [11]:
%timeit build_names()

56.4 μs ± 1.27 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [12]:
%timeit build_names_comprehension()

40 μs ± 193 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [13]:
import dis

In [14]:
dis.dis(build_names)

  1           0 RESUME                   0

  2           2 LOAD_GLOBAL              1 (NULL + set)
             12 CALL                     0
             20 STORE_FAST               0 (names)

  3          22 LOAD_GLOBAL              2 (people)
             32 GET_ITER
        >>   34 FOR_ITER                29 (to 96)
             38 STORE_FAST               1 (person)

  4          40 LOAD_FAST                0 (names)
             42 LOAD_ATTR                5 (NULL|self + add)
             62 LOAD_FAST                1 (person)
             64 LOAD_ATTR                6 (name)
             84 CALL                     1
             92 POP_TOP
             94 JUMP_BACKWARD           31 (to 34)

  3     >>   96 END_FOR

  5          98 LOAD_FAST                0 (names)
            100 RETURN_VALUE


In [15]:
dis.dis(build_names_comprehension)

  1           0 RESUME                   0

  2           2 LOAD_GLOBAL              0 (people)
             12 GET_ITER
             14 LOAD_FAST_AND_CLEAR      0 (person)
             16 SWAP                     2
             18 BUILD_SET                0
             20 SWAP                     2
        >>   22 FOR_ITER                14 (to 54)
             26 STORE_FAST               0 (person)
             28 LOAD_FAST                0 (person)
             30 LOAD_ATTR                2 (name)
             50 SET_ADD                  2
             52 JUMP_BACKWARD           16 (to 22)
        >>   54 END_FOR
             56 STORE_FAST               1 (names)
             58 STORE_FAST               0 (person)

  3          60 LOAD_FAST                1 (names)
             62 RETURN_VALUE
        >>   64 SWAP                     2
             66 POP_TOP

  2          68 SWAP                     2
             70 STORE_FAST               0 (person)
             72 RERAISE     

## 集合检测列表值的存在

更高效也更易读

In [21]:
favorite_names = [f'name - {i}' for i in range(100)]
other_names = [f'name - {i}' for i in range(50, 150)]

In [22]:
def are_common_names(one_names, other_names):
    has_both = False
    for name in one_names:
        if name in other_names:
            has_both = True
            return True
    return has_both

In [23]:
def are_common_names_set_operation(
        one_names, other_names
):
    return len(set(one_names) & set(other_names)) > 0

In [24]:
%timeit are_common_names(favorite_names, other_names)

55.3 µs ± 658 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [25]:
%timeit are_common_names_set_operation(favorite_names, other_names)

5.95 µs ± 271 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


## 集合高效去重

不重复的元素越多，差距越大

In [26]:
list_of_games = [
    'PES2021',
    'Fifa22',
    '1943',
    '1943 Kai',
    'Super Street Fighter II',
    '1943',
    'PES2021',
    'Fifa22',
]

In [31]:
def get_unique_of_games(games):
    unique_of_games = []
    for game in games:
        if game not in unique_of_games:
            unique_of_games.append(game)

    return unique_of_games

def get_unique_of_games_with_set_operation(games):
    return set(games)

In [34]:
%timeit get_unique_of_games(list_of_games)

788 ns ± 26.1 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [35]:
%timeit get_unique_of_games_with_set_operation(list_of_games)

299 ns ± 3.96 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [36]:
import dis

In [37]:
dis.dis(get_unique_of_games)

  2           0 BUILD_LIST               0
              2 STORE_FAST               1 (unique_of_games)

  3           4 LOAD_FAST                0 (games)
              6 GET_ITER
        >>    8 FOR_ITER                22 (to 32)
             10 STORE_FAST               2 (game)

  4          12 LOAD_FAST                2 (game)
             14 LOAD_FAST                1 (unique_of_games)
             16 COMPARE_OP               7 (not in)
             18 POP_JUMP_IF_FALSE        8

  5          20 LOAD_FAST                1 (unique_of_games)
             22 LOAD_METHOD              0 (append)
             24 LOAD_FAST                2 (game)
             26 CALL_METHOD              1
             28 POP_TOP
             30 JUMP_ABSOLUTE            8

  7     >>   32 LOAD_FAST                1 (unique_of_games)
             34 RETURN_VALUE


In [39]:
dis.dis(get_unique_of_games_with_set_operation)

 10           0 LOAD_GLOBAL              0 (set)
              2 LOAD_FAST                0 (games)
              4 CALL_FUNCTION            1
              6 RETURN_VALUE


## 不可变集合

In [40]:
frozenset([1,2,3])

frozenset({1, 2, 3})

In [41]:
frozenset((1, 2, 3,))

frozenset({1, 2, 3})

In [42]:
frozenset(({'a':1, 'b':2}))

frozenset({'a', 'b'})

In [43]:
languages = ['Python', 'C#', 'Java']

## Counter