# Collection Types

In [185]:
from typing import Union, TypedDict, TypeVar, Generic, Any
from collections import defaultdict, UserDict
from  collections.abc import Set, Iterable




## Annotating Collections


In [41]:
# A function that gets how many books from each author I have in my collection

def create_author_count_mapping(cookbooks: list) -> dict:
    counter = defaultdict(lambda: 0)
    for book in cookbooks:
        counter[book.author] += 1
    return counter


In [42]:
# We can encode more information in the types by using bracket syntax 
# to indicate information about the types inside the collection:

class Cookbook:
    pass

AuthorToCountMapping = dict[str, int]

def create_author_count_mapping(
    cookbooks: list[Cookbook]
) -> AuthorToCountMapping:
    """
    _summary_
    """
    counter = defaultdict(lambda: 0)
    for book in cookbooks:
        counter[book.author] += 1
    return counter


## Homogeneous Versus Heterogeneous Collections


In [43]:
# An example of a function that process heterogeneous collections
# (can lead to potentially buggy code)

def adjust_recipe(recipe, servings):
    """
    Take a meal recipe and change the number of servings
    
    :param recipe: A list, where the first element is the number of servings,
                   and the remainder of elements follow the (name, amount, unit)
                   format, such as ("flour", 1.5, "cup")
    :param servings: the number of servings
    :return list: a new list of ingredients, where the first element is the
                  number of servings
    """
    new_recipe = [servings]
    old_servings = recipe[0]
    factor = servings / old_servings
    recipe.pop(0)
    while recipe:
        ingredient, amount, unit = recipe.pop(0)
        # please only use numbers that will be easily measurable
        new_recipe.append((ingredient, amount * factor, unit))
    return new_recipe


In [44]:
# The last function argument types can be specified as follows

Ingredient = tuple[str, int, str] # (name, quantity, units)
Recipe = tuple[Union[int, Ingredient]] # Tuple of servings or ingredients

def adjust_recipe(recipe: Recipe, servings):
    pass


In [45]:
# Consider a tuple of cookbooks

food_lab: Cookbook = ("The Food Lab", 958)
odd_bits: Cookbook = ("Odd Bits", 248)


In [46]:
# When accessing a tuple, you will typically index to the specific field you want

print(food_lab[0])
print(odd_bits[1])


The Food Lab
248


In [47]:
# But writing food_lab[0] or odd_bits[1] constantly can be burdersome.
# A dict can be used, so you can refer to it as food_lab['name'] or food_lab['page_count']

food_lab = {
    "name": "The Food Lab",
    "page_count": 958,
}


In [48]:
# But heterogeneous dics can be burdersome to type:

def print_cookbook(cookbook: dict[str, Union[str,int]]):
    pass

In [49]:
# In these cases, it can be useful to use a TypedDict, 
# which can typecheck dicts of arbitrary complexity

class Range(TypedDict):
    min: float
    max: float

class NutritionInformation(TypedDict):
    value: int
    unit: str
    confidenceRange95Percent: Range
    standardDeviation: float

class RecipeNutritionInformation(TypedDict):
    recipes_used: int
    calories: NutritionInformation
    fat: NutritionInformation
    protein: NutritionInformation
    carbs: NutritionInformation

def get_nutrition_from_spoonacular(x):
    return

recipe_name = 'SOME_RECIPE'
nutrition_information: RecipeNutritionInformation = (
    get_nutrition_from_spoonacular(recipe_name)
)


## Creating New Collections

### Generics

In [50]:
# Consider a function that reverses a list

def reverse(coll: list) -> list:
    return coll[::-1]


In [51]:
# If I want to indicate that the returned list should contain the same type
# as the passed-in list, I can use the a generic type: 

T = TypeVar('T')

def reverse(coll: list[T]) -> list[T]:
    """
    Takes in a list of elements of type T and returns a list of elements of type T.
    """
    return coll[::-1]


In [52]:
# I can represent a generic graph data structore with the following class

Node = TypeVar("Node")
Edge = TypeVar("Edge")

class Graph(Generic[Node, Edge]):
    """Directed Graph"""
    def __init__(self):
        self.edges: dict[Node, list[Edge]] = defaultdict(list)

    def add_relation(self, node: Node, to: Edge):
        self.edges[node].append(to)

    def get_relations(self, node: Node) -> list[Edge]:
        return self.edges[node]


In [68]:
# With the last class, I can define different graphs 
# and still have them typecheck successfully

cookbooks: Graph[Cookbook, Cookbook] = Graph()
recipes: Graph[Recipe, Recipe] = Graph()
cookbook_recipes: Graph[Cookbook, Recipe] = Graph()

recipes.add_relation(
    (('Pasta Bolognese', 1, 'dish'), ), 
    (('Pasta with Sausage and Basil', 2, 'dish'), )
)
cookbook_recipes.add_relation(
    ('The Food Lab', 1), 
    (('Pasta Bolognese', 3, 'dish'), )
)


In [66]:
# And adding incompatible types will be caugth by mypy

cookbooks.add_relation(
    Recipe(('Cheeseburger', 3, 'unit3')),
    Recipe(('Hamburger', 4, 'unit4')),
)


In [None]:
# To see it in action, create the file graph.py with the following contents

Ingredient = tuple[str, int, str]
Recipe = tuple[Union[int, Ingredient]]
Cookbook = tuple[str, int]

Node = TypeVar("Node")
Edge = TypeVar("Edge")


class Graph(Generic[Node, Edge]):
    def __init__(self):
        self.edges: dict[Node, list[Edge]] = defaultdict(list)

    def add_relation(self, node: Node, to: Edge):
        self.edges[node].append(to)

    def get_relations(self, node: Node) -> list[Edge]:
        return self.edges[node]


recipes: Graph[Recipe, Recipe] = Graph()
cookbook_recipes: Graph[Cookbook, Recipe] = Graph()
cookbooks: Graph[Cookbook, Cookbook] = Graph()

recipes.add_relation(
    (('Pasta Bolognese', 1, 'dish'), ), 
    (('Pasta with Sausage and Basil', 2, 'dish'), )
)
cookbook_recipes.add_relation(
    ('The Food Lab', 1), 
    (('Pasta Bolognese', 3, 'dish'), )
)
cookbooks.add_relation(
    (('Cheeseburger', 3, 'dish'), ), 
    (('Hamburger', 4, 'dish'), )
)


In [77]:
# Then run mypy against the file. The last type error will be caugth

!mypy graph.py


graph.py:37: [1m[31merror:[m Argument 1 to [m[1m"add_relation"[m of [m[1m"Graph"[m has incompatible type [m[1m"Tuple[Tuple[str, int, str]]"[m; expected [m[1m"Tuple[str, int]"[m[m
graph.py:38: [1m[31merror:[m Argument 2 to [m[1m"add_relation"[m of [m[1m"Graph"[m has incompatible type [m[1m"Tuple[Tuple[str, int, str]]"[m; expected [m[1m"Tuple[str, int]"[m[m
[1m[31mFound 2 errors in 1 file (checked 1 source file)[m


### Other Uses for Generics


In [92]:
# Considet the following function signatures, where you have to specify 
# a Union[X, APIError] each time, where only X change

NutritionInfo = Any
APIError = Any
Restaurant = Any


def get_nutrition_info(recipe: str) -> Union[NutritionInfo, APIError]:
    return

def get_ingredients(recipe: str) -> Union[list[Ingredient], APIError]:
    return 

def get_restaurants_serving(recipe: str) -> Union[list[Restaurant], APIError]:
    return



In [93]:
# The last signatures can be simplified with generic types, as follows

T = TypeVar("T")
APIResponse = Union[T, APIError]

def get_nutrition_info(recipe: str) -> APIResponse[NutritionInfo]:
    return

def get_ingredients(recipe: str) -> APIResponse[list[Ingredient]]:
    return

def get_restaurants_serving(recipe: str) -> APIResponse[list[Restaurant]]:
    return


### Modifying Existing Types


In [121]:
# Suppose I want a dict-like data structure that automatically 
# handles aliases different salad names that refer to the same type of salad

class NutritionalInformation:
     def __getitem__(self, key):
         pass

def get_nutrition_information(x):
    return

"""
nutrition = NutritionalInformation()
nutrition["arugula"] = get_nutrition_information("arugula") 
print(nutrition["rocket"]) # Should print the same as nutrition["arugula"]
"""


'\nnutrition = NutritionalInformation()\nnutrition["arugula"] = get_nutrition_information("arugula") \nprint(nutrition["rocket"]) # Should print the same as nutrition["arugula"]\n'

In [166]:
# The obvious way is to inherit from 'dict' and override '__getitem__', as follows

def get_aliases(key):
    aliases = {'arugula': 'rocket', 'rocket': 'arugula'}
    return [aliases[key]]
    
class NutritionalInformationDict(dict):
    def __getitem__(self, key):
        try:
            return super().__getitem__(key)
        except KeyError:
            pass
    
        for alias in get_aliases(key):
            try:
                return super().__getitem__(alias)
            except KeyError:
                pass
            raise KeyError(f'Could not find {key} or any of its aliases')


# But with the last implementation, the 'get' built-in dict method won't work as expected
# since it's implemented with the original version of '__getitem__'

nutrition = NutritionalInformationDict({'arugula': 'ARUGULA_DESCRIPTION'})
print(nutrition.get('rocket', 'No Ingredient Found'))


No Ingredient Found


In [165]:
# To avoid the last issue, inherit your class from the 'UserDict' type, as follows

class NutritionalInformationDict(UserDict):
    def __getitem__(self, key):
        try:
            return self.data[key]
        except KeyError:
            pass
        
        for alias in get_aliases(key):
            try:
                return self.data[alias]
            except KeyError:
                pass
        raise KeyError(f'Could not find {key} or any of its aliases')

# """
nutrition = NutritionalInformationDict({'arugula': 'ARUGULA_DESCRIPTION'})
nutrition['rocket']


'ARUGULA_DESCRIPTION'

### As Easy as ABC


In [177]:
# There's no 'UserSet' like the last type 'UserDict'. 
# But a custom set can be created with abstract base classes, if it implements 
# the '__contains__', '__iter__' and '__len__' methods

class AliasedIngredientsSet(Set):
    def __init__(self, ingredients: set[str]):
        self.ingredients = ingredients
    
    def __contains__(self, value: str):
        return (
            value in self.ingredients or 
            any(alias in self.ingredients for alias in get_aliases(value))
        )
    
    def __iter__(self):
        return iter(self.ingredients)
    
    def __len__(self):
        return len(self.ingredients)


In [178]:
# Now the AliasedIngredientsSet class behaves like a standard set, but with the new behavior added

ingredients = AliasedIngredientsSet({'arugula', 'eggplant', 'pepper'})

for ingredient in ingredients:
    print(ingredient)


eggplant
pepper
arugula


In [179]:
print(len(ingredients))

3


In [180]:
print('arugula' in ingredients)


True


In [181]:
print('rocket' in ingredients) # Aliasing behavior working as expected


True


In [183]:
list(ingredients | AliasedIngredientsSet({'garlic'})) # The set union operation


['eggplant', 'pepper', 'arugula', 'garlic']

In [191]:
# Types derived from collections.abc can be typechecked as follows

def print_items(items):
    for item in items:
        print(item, end=' ')


In [192]:
print_items([1,2,3])
print('\n')
print_items({4, 5, 6})
print('\n')
print_items({"A": 1, "B": 2, "C": 3})


1 2 3 

4 5 6 

A B C 

In [194]:
def print_items(items: Iterable):
    for item in items:
        print(item)

print_items([1,2,3])
print('\n')
print_items({4, 5, 6})
print('\n')
print_items({"A": 1, "B": 2, "C": 3})
print('\n')
print_items(AliasedIngredientsSet({'arugula', 'eggplant', 'pepper'}))


1
2
3


4
5
6


A
B
C


eggplant
pepper
arugula
