###### References: 
- https://docs.python.org/3/reference/datamodel.html
- Fluent Python, 2nd Edition, by Luciano Ramalho. Chapter 5: Data Class Builders

# Data Class Builders
Which are simple class that is just a collection of fields, with little or no extra functionality.
* `Collections.namedtuple`
* `typing.NamedTuple`
* `@dataclasses.dataclass`

# Overview of Data Class Builders


In [1]:
class Coordinate:

    def __init__(self, lat, lon):
        self.lat = lat
        self.lon = lon

In [2]:
moscow = Coordinate(55.756, 37.617)

In [3]:
moscow

<__main__.Coordinate at 0x108026260>

In [4]:
location  = Coordinate(55.756, 37.617)

In [5]:
location == moscow

False

In [6]:
location

<__main__.Coordinate at 0x1080262c0>

In [7]:
(location.lat, location.lon)  == (moscow.lat, moscow.lon)

True

## `namedtuple`

In [8]:
from collections import namedtuple

In [9]:
Coordinate = namedtuple('Coordinate', 'lat lon')
issubclass(Coordinate, tuple)

True

In [10]:
moscow = Coordinate(55.756, 37.617)
moscow

Coordinate(lat=55.756, lon=37.617)

In [11]:
moscow == Coordinate(lat=55.756, lon=37.617)

True

## `typing.NamedTuple`

In [12]:
import typing

In [13]:
Coordinate = typing.NamedTuple('Coordinate', [('lat', float), ('lon', float)])
issubclass(Coordinate, tuple)

True

In [14]:
typing.get_type_hints(Coordinate)

{'lat': float, 'lon': float}

In [15]:
from typing import NamedTuple

In [16]:
class Coordinate(NamedTuple):
    lat: float
    lon: float

    def __str__(self):
        ns = 'N' if self.lat >= 0 else 'S'
        we = 'E' if self.lon >= 0 else 'W'
        return f'{abs(self.lat):.1f}°{ns}, {abs(self.lon):.1f}°{we}'

In [17]:
from dataclasses import dataclass

In [18]:
@dataclass(frozen=True)
class Coordinate:
    lat: float
    lon: float

    def __str__(self):
        ns = 'N' if self.lat >= 0 else 'S'
        we = 'E' if self.lon >= 0 else 'W'
        return f'{abs(self.lat):.1f}°{ns}, {abs(self.lon):.1f}°{we}'

## Classic Named Tuple
The  `collections.namedtuple`  function is a factory that produces subclasses  of tuple enhanced with field names and a class name.

In [19]:
from collections import namedtuple
City = namedtuple('City', 'name country population coordinates')
tokyo = City('Tokyo', 'JP', 14.043, (35.652832, 139.839478))
tokyo

City(name='Tokyo', country='JP', population=14.043, coordinates=(35.652832, 139.839478))

In [20]:
tokyo.population

14.043

In [21]:
tokyo.coordinates

(35.652832, 139.839478)

In [22]:
tokyo[1]

'JP'

In [23]:
City._fields

('name', 'country', 'population', 'coordinates')

In [24]:
LatLong = namedtuple('LatLong', 'lat long')
delhi_data = ('Delhi NCR', 'IN', 21.935, LatLong(28.613889, 77.208889))

delhi = City._make(delhi_data)
delhi._asdict()

{'name': 'Delhi NCR',
 'country': 'IN',
 'population': 21.935,
 'coordinates': LatLong(lat=28.613889, long=77.208889)}

In [25]:
import json

In [26]:
json.dumps(delhi._asdict())

'{"name": "Delhi NCR", "country": "IN", "population": 21.935, "coordinates": [28.613889, 77.208889]}'

In [27]:
Coordinate = namedtuple('Coordinate', 'lat lon reference', defaults=['WGS84'])
Coordinate(0,0)

Coordinate(lat=0, lon=0, reference='WGS84')

In [28]:
Coordinate._field_defaults

{'reference': 'WGS84'}

In [29]:
Coordinate(1)

TypeError: Coordinate.__new__() missing 1 required positional argument: 'lon'

In [30]:
Coordinate = namedtuple('Coordinate', 'lat lon reference', defaults=[0,0,'WGS84'])


In [31]:
Coordinate()

Coordinate(lat=0, lon=0, reference='WGS84')

In [32]:
Coordinate(1)

Coordinate(lat=1, lon=0, reference='WGS84')

In [33]:
Coordinate(reference='test')

Coordinate(lat=0, lon=0, reference='test')

## Typed Named Tuple
`typing.NamedTuple`

In [34]:
from typing import NamedTuple

In [35]:
class Coordinate(NamedTuple):
    lat: float                
    lon: float
    reference: str = 'WGS84'

# Type Hints 101

## No Runtime effect

In [36]:
# no type checking at runtime
trash = Coordinate('Ni!', None)
print(trash) 

Coordinate(lat='Ni!', lon=None, reference='WGS84')


## Variable Annotation Syntax

`var_name: some_type = a_value`

In [37]:
class DemoPlainClass:
    a: int           # becomes an entry in __annotation__
    b: float = 1.1   # saved as an annotation and a becomes a class attribute
    c = 'spam'       # class attribute with no annotation

In [38]:
DemoPlainClass.__annotations__

{'a': int, 'b': float}

In [39]:
DemoPlainClass.a

AttributeError: type object 'DemoPlainClass' has no attribute 'a'

In [40]:
DemoPlainClass.b

1.1

In [41]:
DemoPlainClass.c

'spam'

## `typing.NamedTyple`

In [42]:
class DemoNTClass(typing.NamedTuple):
    a: int           # becomes an annotation and an instance attribute
    b: float = 1.1   
    c = 'spam'  

In [43]:
DemoNTClass.__annotations__

{'a': int, 'b': float}

In [44]:
DemoNTClass.a

_tuplegetter(0, 'Alias for field number 0')

In [45]:
DemoNTClass.b

_tuplegetter(1, 'Alias for field number 1')

In [46]:
DemoNTClass.c

'spam'

In [47]:
DemoNTClass.__doc__

'DemoNTClass(a, b)'

In [48]:
nt = DemoNTClass(8)
nt.a

8

In [49]:
nt.b

1.1

In [50]:
nt.c

'spam'

## `@dataclass`

In [51]:
from dataclasses import dataclass

In [52]:
@dataclass
class DemoDataClass:
    a: int  
    b: float = 1.1 
    c = 'spam'   

In [53]:
DemoDataClass.__annotations__

{'a': int, 'b': float}

In [54]:
DemoDataClass.__doc__

'DemoDataClass(a: int, b: float = 1.1)'

In [55]:
# a will only exist in instances
DemoDataClass.a

AttributeError: type object 'DemoDataClass' has no attribute 'a'

In [56]:
dc = DemoDataClass(9)
dc.a

9

In [57]:
dc.a = 3.14
dc.b = 'oops'

In [58]:
dc.c = 'whatever'
dc.z = 'works'

`@dataclasses.dataclass(*, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, 
                        match_args=True, kw_only=False, slots=False, weakref_slot=False)`

## Field Options

In [59]:
@dataclass
class ClubMember:
    name: str
    guests: list = []

ValueError: mutable default <class 'list'> for field guests is not allowed: use default_factory

The `default_factory` parameter allows to provide a function, class or any other callable, which will be invoked with zero arguments to build a default value at each instance.

In [60]:
from dataclasses import field

In [61]:
@dataclass
class ClubMember:
    name: str
    guests: list = field(default_factory=list)

In [62]:
@dataclass
class ClubMember:
    name: str
    guests: list[str] = field(default_factory=list)  # `list` means list of str.

To create an athlete field with devault value of `False`, and also omit the field from the `__repr__`:

In [63]:
@dataclass
class ClubMember:
    name: str
    guests: list = field(default_factory=list)
    athlete: bool = field(default=False, repr=False)

In [64]:
clubmember = ClubMember('Sam')

In [65]:
repr(clubmember.name)

"'Sam'"

In [66]:
repr(clubmember.athlete)

'False'

## Post-init Processing

In [67]:
@dataclass
class HackerClubMember(ClubMember):                         
    all_handles = set()                                     # class attribute
    handle: str = ''                                        # instance field of type str

    def __post_init__(self):
        cls = self.__class__                                
        if self.handle == '':                               # if empty, set to first part of name
            self.handle = self.name.split()[0]
        if self.handle in cls.all_handles:                  # add new handle
            msg = f'handle {self.handle!r} already exists.'
            raise ValueError(msg)
        cls.all_handles.add(self.handle)     

``HackerClubMember`` objects accept an optional ``handle`` argument::


In [68]:
anna = HackerClubMember('Anna Ravenscroft', handle='AnnaRaven')
anna

HackerClubMember(name='Anna Ravenscroft', guests=[], handle='AnnaRaven')

If ``handle`` is omitted, it's set to the first part of the member's name::

In [69]:
leo = HackerClubMember('Leo Rochael')
leo

HackerClubMember(name='Leo Rochael', guests=[], handle='Leo')

Members must have a unique handle. The following ``leo2`` will not be created,
because its ``handle`` would be 'Leo', which was taken by ``leo``::

In [70]:
leo2 = HackerClubMember('Leo DaVinci')

ValueError: handle 'Leo' already exists.

In [71]:
leo2 = HackerClubMember('Leo DaVinci', handle='Neo')
leo2

HackerClubMember(name='Leo DaVinci', guests=[], handle='Neo')

In [72]:
HackerClubMember.__doc__

"HackerClubMember(name: str, guests: list = <factory>, athlete: bool = False, handle: str = '')"

## Typed Class Attributes
    all_handles: ClassVar[Set[str]] = set()
    
## Initialization Variables That Are Not Fields
### Init-Only 
https://docs.python.org/3/library/dataclasses.html

    from dataclasses import InitVar

    @dataclass
    class C:
        i: int
        j: int | None = None
        database: InitVar[DatabaseType | None] = None

        def __post_init__(self, database):
            if self.j is None and database is not None:
                self.j = database.lookup('j')

    c = C(10, database=my_database)
    
## Eg: Dublin Core Resource Record

Dublin Core Metadata Element Set (DCMES), is a set of fifteen "core" elements (properties) for describing resources.
 
The resources described using the Dublin Core may be digital resources (video, images, web pages, etc.) as well as physical resources such as books or works of art. 

In [73]:
from dataclasses import dataclass, field
from typing import Optional
from enum import Enum, auto
from datetime import date

In [74]:
class ResourceType(Enum):  # provide type safe values
    BOOK = auto()
    EBOOK = auto()
    VIDEO = auto()


@dataclass
class Resource:
    """Media resource description."""
    identifier: str                                    
    title: str = '<untitled>'                          # first field with default
    creators: list[str] = field(default_factory=list)
    date: Optional[date] = None                        
    type: ResourceType = ResourceType.BOOK             
    description: str = ''
    language: str = ''
    subjects: list[str] = field(default_factory=list)

In [75]:
description = 'Improving the design of existing code'

book = Resource('978-0-13-475759-9', 'Refactoring, 2nd Edition',
             ['Martin Fowler', 'Kent Beck'], date(2018, 11, 19),
             ResourceType.BOOK, description, 'EN',
             ['computer programming', 'OOP'])

In [76]:
book  # doctest: +NORMALIZE_WHITESPACE

Resource(identifier='978-0-13-475759-9', title='Refactoring, 2nd Edition', creators=['Martin Fowler', 'Kent Beck'], date=datetime.date(2018, 11, 19), type=<ResourceType.BOOK: 1>, description='Improving the design of existing code', language='EN', subjects=['computer programming', 'OOP'])

In [77]:
from dataclasses import fields

In [78]:
@dataclass
class Resource:
    """Media resource description."""
    identifier: str
    title: str = '<untitled>'
    creators: list[str] = field(default_factory=list)
    date: Optional[date] = None
    type: ResourceType = ResourceType.BOOK
    description: str = ''
    language: str = ''
    subjects: list[str] = field(default_factory=list)

    def __repr__(self):
        cls = self.__class__
        cls_name = cls.__name__
        indent = ' ' * 4
        res = [f'{cls_name}(']                            
        for f in fields(cls):                             
            value = getattr(self, f.name)                 
            res.append(f'{indent}{f.name} = {value!r},')  

        res.append(')')                                   
        return '\n'.join(res)         

In [79]:
book = Resource('978-0-13-475759-9', 'Refactoring, 2nd Edition',
             ['Martin Fowler', 'Kent Beck'], date(2018, 11, 19),
             ResourceType.BOOK, description, 'EN',
             ['computer programming', 'OOP'])

book  # doctest: +NORMALIZE_WHITESPACE

Resource(
    identifier = '978-0-13-475759-9',
    title = 'Refactoring, 2nd Edition',
    creators = ['Martin Fowler', 'Kent Beck'],
    date = datetime.date(2018, 11, 19),
    type = <ResourceType.BOOK: 1>,
    description = 'Improving the design of existing code',
    language = 'EN',
    subjects = ['computer programming', 'OOP'],
)

## Data Class as a Code Smell

### Data Class as Scaffolding

### Data Class as Intermediate Representation

# Pattern Matching Class Intances
## Simple Class Paterns
### Keyword Class Paterns

In [80]:
class City(typing.NamedTuple):
    continent: str
    name: str
    country: str

In [81]:
cities = [
    City('Asia', 'Tokyo', 'JP'),
    City('Asia', 'Delhi', 'IN'),
    City('North America', 'Mexico City', 'MX'),
    City('North America', 'New York', 'US'),
    City('South America', 'São Paulo', 'BR')
]

In [82]:
# return a list of cities
def match_asian_cities():
    results = []
    for city in cities:
        match city:
            case City(continent='Asia'):
                results.append(city)
    return results

In [83]:
# collect city values
def match_asian_cities():
    results = []
    for city in cities:
        match city:
            case City(continent='Asia', country=cc):
                results.append(cc)
    return results

### Positional Class Paterns

In [84]:
def match_asian_cities_pos():
    results = []
    for city in cities:
        match city:
            case City(continent='Asia'):
                results.append(city)
    return results

In [85]:
def match_asian_cities_pos():
    results = []
    for city in cities:
        match city:
            case City('Asia', _, country):
                results.append(country)
    return results

In [86]:
City.__match_args__

('continent', 'name', 'country')