Data Class can be defined as classes which are only meant to store data.<br>
- These classes can be cutomised to compare and hash data.

In [1]:
def typename(obj):
    return type(obj).__name__

In [2]:
class Position:
    def __init__(self, latitude, longitude):
        if not (-90 <= latitude <= +90):
            raise ValueError(f"Latitude {latitude} out of range")

        if not (-180 <= longitude <= +180):
            raise ValueError(f"Longitude {longitude} out of range")

        self._latitude = latitude
        self._longitude = longitude

    @property
    def latitude(self):
        return self._latitude

    @property
    def longitude(self):
        return self._longitude

    @property
    def latitude_hemisphere(self):
        return "N" if self.latitude >= 0 else "S"

    @property
    def longitude_hemisphere(self):
        return "E" if self.longitude >= 0 else "W"

    def __repr__(self):
        return f"{typename(self)}(latitude={self.latitude}, longitude={self.longitude})"

    def __str__(self):
        return format(self)

    def __format__(self, format_spec):
        component_format_spec = ".2f"
        prefix, dot, suffix = format_spec.partition(".")
        if dot:
            num_decimal_places = int(suffix)
            component_format_spec = f".{num_decimal_places}f"
        latitude = format(abs(self.latitude), component_format_spec)
        longitude = format(abs(self.longitude), component_format_spec)
        return (
            f"{latitude}° {self.latitude_hemisphere}, "
            f"{longitude}° {self.longitude_hemisphere}"
        )

    def __eq__(self, other):
        if not isinstance(other, type(self)):
            return NotImplemented
        return (self.latitude == other.latitude) and (self.longitude == other.longitude)

    def __hash__(self):
        return hash((self.latitude, self.longitude))

class EarthPosition(Position):
    pass

class MarsPosition(Position):
    pass


In [3]:
from dataclasses import dataclass

# We can create data classes with inbuilt dataclass decorator
@dataclass(eq=True)
class Location:
    # Specifying the instance type of the arguments are optional 
    name: str
    position: Position


hong_kong = Location("Hong Kong", EarthPosition(22.29, 114.16))
stockholm = Location("Stockholm", EarthPosition(59.33, 18.06))
cape_town = Location("Cape Town", EarthPosition(-33.93, 18.42))
rotterdam = Location("Rotterdam", EarthPosition(51.96, 4.47))
maracaibo = Location("Maracaibo", EarthPosition(10.65, -71.65))

In [4]:
# Because we passed eq = True we can compare two data classes and see if they are equal, which was not possible earlier
# Note: For the purpose we also have to define __eq__ on position class to tell python when can 2 positions be equal
rsa_city = Location("Cape Town", EarthPosition(-33.93, 18.42))

rsa_city == cape_town

True

In [5]:
# dataclass decorator creates the desired __repr__ for classes by default
cape_town

Location(name='Cape Town', position=EarthPosition(latitude=-33.93, longitude=18.42))

### Hashing
Hashing is used when we define sets and dictionary, all the attributes are stored as hash

In [6]:
try:
    set_of_locations = {cape_town, hong_kong, rotterdam}
except Exception as e:
    print(e)

unhashable type: 'Location'


### Hashing
Although dataclass does have an attribute unsafe_hash = True, its not recommended.<br>
For dataclass to automatically make our dataclass hashable we have to do three things.<br>
- Use immutable attribute types, str int tuples...
- declare the dataset as frozen(immutable), frozen=True
- define __ hash__ for Position class


In [7]:
@dataclass(eq=True, frozen=True)
class Location:
    name: str
    position: Position


hong_kong = Location("Hong Kong", EarthPosition(22.29, 114.16))
stockholm = Location("Stockholm", EarthPosition(59.33, 18.06))
cape_town = Location("Cape Town", EarthPosition(-33.93, 18.42))
rotterdam = Location("Rotterdam", EarthPosition(51.96, 4.47))
maracaibo = Location("Maracaibo", EarthPosition(10.65, -71.65))

In [8]:
set_of_locations = {cape_town, hong_kong, rotterdam}
set_of_locations

{Location(name='Cape Town', position=EarthPosition(latitude=-33.93, longitude=18.42)),
 Location(name='Hong Kong', position=EarthPosition(latitude=22.29, longitude=114.16)),
 Location(name='Rotterdam', position=EarthPosition(latitude=51.96, longitude=4.47))}

### Dataclass invariants
The main upper hand of classes over other datastructures is the capacity to validate the values stored.<br>
There is a way to do this in dataclass without overriding the init method

In [10]:
# Post_init runs just after initializing the class
@dataclass(eq=True, frozen=True)
class Location:
    name: str
    position: Position

    def __post_init__(self):
        if self.name == "":
            raise ValueError("Location name cannot be empty")

try:
    null_island = Location("", Position(0.0, 0.0))
except Exception as e:
    print(e)

Location name cannot be empty


<b>Why Class attributes should be immutable</b>


In [11]:
# Lets try to do the same thing with frozen=False
@dataclass(eq=True, frozen=False)
class Location:
    name: str
    position: Position

    def __post_init__(self):
        if self.name == "":
            raise ValueError("Location name cannot be empty")

null_island = Location("null_island", Position(0.0, 0.0))
# As attributes are not frozen we can change the name to empty string
null_island.name = ""
null_island

Location(name='', position=Position(latitude=0.0, longitude=0.0))