# Introduction to Dataclasses

In [2]:
from dataclasses import dataclass

@dataclass
class Position:
    name: str
    lon: float
    lat: float

In [3]:
pos = Position('Oslo', 10.8, 59.9)
print(pos)

Position(name='Oslo', lon=10.8, lat=59.9)


In [4]:
pos.lat

59.9

In [5]:
print(f'{pos.name} is at {pos.lat}°N, {pos.lon}°E')

Oslo is at 59.9°N, 10.8°E


In [6]:
@dataclass
class Position:
    name: str
    lon: float = 0.0
    lat: float = 0.0

In [7]:
Position('Default Island')

Position(name='Default Island', lon=0.0, lat=0.0)

In [8]:
Position('Greenwich', lat=51.8)

Position(name='Greenwich', lon=0.0, lat=51.8)

In [9]:
Position('Vancouver', -123.1, 49.3)

Position(name='Vancouver', lon=-123.1, lat=49.3)

In [10]:
from dataclasses import dataclass
from typing import Any

@dataclass
class WithoutExplicitTypes:
    name: Any
    value: Any = 42

In [11]:
WithoutExplicitTypes('')

WithoutExplicitTypes(name='', value=42)

In [12]:
Position(3.14, 'pi day', 2018)

Position(name=3.14, lon='pi day', lat=2018)

In [13]:
from math import asin, cos, radians, sin, sqrt

@dataclass
class Position:
    name: str
    lon: float = 0.0
    lat: float = 0.0

    def distance_to(self, other):
        r = 6371  # Earth radius in kilometers
        lam_1, lam_2 = radians(self.lon), radians(other.lon)
        phi_1, phi_2 = radians(self.lat), radians(other.lat)
        h = (sin((phi_2 - phi_1) / 2)**2
             + cos(phi_1) * cos(phi_2) * sin((lam_2 - lam_1) / 2)**2)
        return 2 * r * asin(sqrt(h))

In [14]:
oslo = Position('Oslo', 10.8, 59.9)
vancouver = Position('Vancouver', -123.1, 49.3)
oslo.distance_to(vancouver)

7181.7841229421165

## A deck of cards

In [15]:
from typing import List

@dataclass
class PlayingCard:
    rank: str
    suit: str

@dataclass
class Deck:
    cards: List[PlayingCard]

In [16]:
queen_of_hearts = PlayingCard('Q', 'Hearts')
ace_of_spades = PlayingCard('A', 'Spades')
two_cards = Deck([queen_of_hearts, ace_of_spades])

two_cards

Deck(cards=[PlayingCard(rank='Q', suit='Hearts'), PlayingCard(rank='A', suit='Spades')])

In [17]:
RANKS = '2 3 4 5 6 7 8 9 10 J Q K A'.split()
SUITS = '♣ ♢ ♡ ♠'.split()

def make_french_deck():
    return [PlayingCard(r, s) for s in SUITS for r in RANKS]

In [18]:
make_french_deck()

[PlayingCard(rank='2', suit='♣'),
 PlayingCard(rank='3', suit='♣'),
 PlayingCard(rank='4', suit='♣'),
 PlayingCard(rank='5', suit='♣'),
 PlayingCard(rank='6', suit='♣'),
 PlayingCard(rank='7', suit='♣'),
 PlayingCard(rank='8', suit='♣'),
 PlayingCard(rank='9', suit='♣'),
 PlayingCard(rank='10', suit='♣'),
 PlayingCard(rank='J', suit='♣'),
 PlayingCard(rank='Q', suit='♣'),
 PlayingCard(rank='K', suit='♣'),
 PlayingCard(rank='A', suit='♣'),
 PlayingCard(rank='2', suit='♢'),
 PlayingCard(rank='3', suit='♢'),
 PlayingCard(rank='4', suit='♢'),
 PlayingCard(rank='5', suit='♢'),
 PlayingCard(rank='6', suit='♢'),
 PlayingCard(rank='7', suit='♢'),
 PlayingCard(rank='8', suit='♢'),
 PlayingCard(rank='9', suit='♢'),
 PlayingCard(rank='10', suit='♢'),
 PlayingCard(rank='J', suit='♢'),
 PlayingCard(rank='Q', suit='♢'),
 PlayingCard(rank='K', suit='♢'),
 PlayingCard(rank='A', suit='♢'),
 PlayingCard(rank='2', suit='♡'),
 PlayingCard(rank='3', suit='♡'),
 PlayingCard(rank='4', suit='♡'),
 PlayingCard

How do we assign a default value to the deck?

In [20]:
#Incorrect method
from dataclasses import dataclass
from typing import List

@dataclass
class Deck:  # Will NOT work
    cards: List[PlayingCard] = make_french_deck()

ValueError: mutable default <class 'list'> for field cards is not allowed: use default_factory

In [21]:
#Correct method
from dataclasses import dataclass, field
from typing import List

@dataclass
class Deck:
    cards: List[PlayingCard] = field(default_factory=make_french_deck)

In [22]:
Deck()

Deck(cards=[PlayingCard(rank='2', suit='♣'), PlayingCard(rank='3', suit='♣'), PlayingCard(rank='4', suit='♣'), PlayingCard(rank='5', suit='♣'), PlayingCard(rank='6', suit='♣'), PlayingCard(rank='7', suit='♣'), PlayingCard(rank='8', suit='♣'), PlayingCard(rank='9', suit='♣'), PlayingCard(rank='10', suit='♣'), PlayingCard(rank='J', suit='♣'), PlayingCard(rank='Q', suit='♣'), PlayingCard(rank='K', suit='♣'), PlayingCard(rank='A', suit='♣'), PlayingCard(rank='2', suit='♢'), PlayingCard(rank='3', suit='♢'), PlayingCard(rank='4', suit='♢'), PlayingCard(rank='5', suit='♢'), PlayingCard(rank='6', suit='♢'), PlayingCard(rank='7', suit='♢'), PlayingCard(rank='8', suit='♢'), PlayingCard(rank='9', suit='♢'), PlayingCard(rank='10', suit='♢'), PlayingCard(rank='J', suit='♢'), PlayingCard(rank='Q', suit='♢'), PlayingCard(rank='K', suit='♢'), PlayingCard(rank='A', suit='♢'), PlayingCard(rank='2', suit='♡'), PlayingCard(rank='3', suit='♡'), PlayingCard(rank='4', suit='♡'), PlayingCard(rank='5', suit='♡

In [23]:
@dataclass
class Position:
    name: str
    lon: float = field(default=0.0, metadata={'unit': 'degrees'})
    lat: float = field(default=0.0, metadata={'unit': 'degrees'})

In [24]:
from dataclasses import fields

fields(Position)

(Field(name='name',type=<class 'str'>,default=<dataclasses._MISSING_TYPE object at 0x7f880d8f7eb0>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f880d8f7eb0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
 Field(name='lon',type=<class 'float'>,default=0.0,default_factory=<dataclasses._MISSING_TYPE object at 0x7f880d8f7eb0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'unit': 'degrees'}),_field_type=_FIELD),
 Field(name='lat',type=<class 'float'>,default=0.0,default_factory=<dataclasses._MISSING_TYPE object at 0x7f880d8f7eb0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'unit': 'degrees'}),_field_type=_FIELD))

In [25]:
lat_unit = fields(Position)[2].metadata['unit']
lat_unit

'degrees'

How do we make Deck easier to read?

First let's make PlayingCard easier to read

In [26]:
@dataclass
class PlayingCard:
    rank: str
    suit: str

    def __str__(self):
        return f'{self.suit}{self.rank}'

In [27]:
ace_of_spades = PlayingCard('A', '♠')
ace_of_spades

PlayingCard(rank='A', suit='♠')

In [28]:
print(ace_of_spades)

♠A


In [29]:
from dataclasses import dataclass, field
from typing import List

@dataclass
class Deck:
    cards: List[PlayingCard] = field(default_factory=make_french_deck)

    def __repr__(self):
        cards = ', '.join(f'{c!s}' for c in self.cards)
        return f'{self.__class__.__name__}({cards})'

In [30]:
Deck()

Deck(♣2, ♣3, ♣4, ♣5, ♣6, ♣7, ♣8, ♣9, ♣10, ♣J, ♣Q, ♣K, ♣A, ♢2, ♢3, ♢4, ♢5, ♢6, ♢7, ♢8, ♢9, ♢10, ♢J, ♢Q, ♢K, ♢A, ♡2, ♡3, ♡4, ♡5, ♡6, ♡7, ♡8, ♡9, ♡10, ♡J, ♡Q, ♡K, ♡A, ♠2, ♠3, ♠4, ♠5, ♠6, ♠7, ♠8, ♠9, ♠10, ♠J, ♠Q, ♠K, ♠A)

In [31]:
queen_of_hearts = PlayingCard('Q', '♡')
ace_of_spades = PlayingCard('A', '♠')
ace_of_spades > queen_of_hearts

TypeError: '>' not supported between instances of 'PlayingCard' and 'PlayingCard'

In [32]:
@dataclass(order=True)
class PlayingCard:
    rank: str
    suit: str

    def __str__(self):
        return f'{self.suit}{self.rank}'

In [33]:
queen_of_hearts = PlayingCard('Q', '♡')
ace_of_spades = PlayingCard('A', '♠')
ace_of_spades > queen_of_hearts

False

The Ace is larger than the Queen in a deck of cards, so let's fix this.

In [38]:
RANKS = '2 3 4 5 6 7 8 9 10 J Q K A'.split()
SUITS = '♣ ♢ ♡ ♠'.split()

card = PlayingCard('Q', '♡')
card2 = PlayingCard('J', '♠')

Since our ranks are sorted in ascending order, we can use their index as a ranking value and we just need to account for ranking the value of suits.

In [43]:
RANKS.index(card.rank) * len(SUITS) + SUITS.index(card.suit)

42

In [41]:
RANKS.index(card2.rank) * len(SUITS) + SUITS.index(card2.suit)

39

In [44]:
from dataclasses import dataclass, field

RANKS = '2 3 4 5 6 7 8 9 10 J Q K A'.split()
SUITS = '♣ ♢ ♡ ♠'.split()

@dataclass(order=True)
class PlayingCard:
    sort_index: int = field(init=False, repr=False)
    rank: str
    suit: str

    def __post_init__(self):
        self.sort_index = (RANKS.index(self.rank) 
                           * len(SUITS) 
                           + SUITS.index(self.suit)
                          )

    def __str__(self):
        return f'{self.suit}{self.rank}'


Note that .sort_index is added as the **first** field of the class. That way, the comparison is first done using .sort_index and the other fields are used only if there are ties. With field, .sort_index should not be included as a parameter in the .__init__() method (because it is calculated from the .rank and .suit fields). We remove .sort_index from the repr of the class to prevent further confusion.

In [45]:
queen_of_hearts = PlayingCard('Q', '♡')
ace_of_spades = PlayingCard('A', '♠')
ace_of_spades > queen_of_hearts

True

In [46]:
Deck(sorted(make_french_deck()))

Deck(♣2, ♢2, ♡2, ♠2, ♣3, ♢3, ♡3, ♠3, ♣4, ♢4, ♡4, ♠4, ♣5, ♢5, ♡5, ♠5, ♣6, ♢6, ♡6, ♠6, ♣7, ♢7, ♡7, ♠7, ♣8, ♢8, ♡8, ♠8, ♣9, ♢9, ♡9, ♠9, ♣10, ♢10, ♡10, ♠10, ♣J, ♢J, ♡J, ♠J, ♣Q, ♢Q, ♡Q, ♠Q, ♣K, ♢K, ♡K, ♠K, ♣A, ♢A, ♡A, ♠A)

Suppose we need to generate 5 random cards for poker.

In [48]:
from random import sample

Deck(sample(make_french_deck(), k=5))

Deck(♣Q, ♡8, ♢7, ♣3, ♡4)

Note that sample works without replacement.

## Immutable Dataclasses

In [49]:
from dataclasses import dataclass

@dataclass(frozen=True)
class Position:
    name: str
    lon: float = 0.0
    lat: float = 0.0

In [50]:
pos = Position('Oslo', 10.8, 59.9)
pos.name

'Oslo'

In [51]:
pos.name = 'Stockholm'

FrozenInstanceError: cannot assign to field 'name'

In [52]:
from dataclasses import dataclass
from typing import List

@dataclass(frozen=True)
class ImmutableCard:
    rank: str
    suit: str

@dataclass(frozen=True)
class ImmutableDeck:
    cards: List[ImmutableCard]

In [53]:
queen_of_hearts = ImmutableCard('Q', '♡')
ace_of_spades = ImmutableCard('A', '♠')
deck = ImmutableDeck([queen_of_hearts, ace_of_spades])
deck

ImmutableDeck(cards=[ImmutableCard(rank='Q', suit='♡'), ImmutableCard(rank='A', suit='♠')])

In [54]:
deck.cards[0] = ImmutableCard('7', '♢')
deck

ImmutableDeck(cards=[ImmutableCard(rank='7', suit='♢'), ImmutableCard(rank='A', suit='♠')])

Although both dataclasses are immutable, the List is not.  As such, we should use a Tuple instead.

## Ordering

In [62]:
@dataclass
class Position:
    name: str
    lon: float
    lat: float

@dataclass
class Capital(Position):
    country: str

In [63]:
Capital('Oslo', 10.8, 59.9, 'Norway')

Capital(name='Oslo', lon=10.8, lat=59.9, country='Norway')

In [64]:
@dataclass
class Position:
    name: str
    lon: float = 0.0
    lat: float = 0.0

@dataclass
class Capital(Position):
    country: str  # Does NOT work

TypeError: non-default argument 'country' follows default argument

Country has no default value and all parameters must have a default value if at least 1 is defined.

In [65]:
@dataclass
class Position:
    name: str
    lon: float = 0.0
    lat: float = 0.0

@dataclass
class Capital(Position):
    country: str = 'Unknown'
    lat: float = 40.0

In [66]:
Capital('Madrid', country='Spain')

Capital(name='Madrid', lon=0.0, lat=40.0, country='Spain')

Note that the Capital class is superceded by the Position class for ordering.

## Slots

In [68]:
from dataclasses import dataclass

@dataclass
class SimplePosition:
    name: str
    lon: float
    lat: float

@dataclass
class SlotPosition:
    __slots__ = ['name', 'lon', 'lat']
    name: str
    lon: float
    lat: float

In [69]:
from pympler import asizeof
simple = SimplePosition('London', -0.1, 51.5)
slot = SlotPosition('Madrid', -3.7, 40.4)
asizeof.asizesof(simple, slot)

ModuleNotFoundError: No module named 'pympler'

In [70]:
from timeit import timeit
timeit('slot.name', setup="slot=SlotPosition('Oslo', 10.8, 59.9)", globals=globals())

0.028239900000244234

In [71]:
timeit('simple.name', setup="simple=SimplePosition('Oslo', 10.8, 59.9)", globals=globals())

0.028091073999803484

There is a performance gain from using slots.