<a href="https://colab.research.google.com/github/ys7yoo/effective_python/blob/master/24_classmethod.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Item 24: Use @classmethod Polymorphism to Construct Objects Generaically

In [1]:
# preamble to mimick book environment
import logging
from pprint import pprint
from sys import stdout as STDOUT

from tempfile import TemporaryDirectory
import random

# generate random data files
def write_test_files(tmpdir):
    for i in range(100):
        with open(os.path.join(tmpdir, str(i)), 'w') as f:
            f.write('\n' * random.randint(0, 100))

## First implementation using polymorphism

In [2]:
# a common class to represent the input data
class InputData(object):
    def read(self):
        raise NotImplementedError

        
# a concrete subclass of InputData that reads data from a file on disk
class PathInputData(InputData):
    def __init__(self, path):
        super().__init__()
        self.path = path

    def read(self):
        return open(self.path).read()

    
# an abstract class for MapReduce worker that consumes the input data 
class Worker(object):
    def __init__(self, input_data):
        self.input_data = input_data
        self.result = None

    def map(self):
        raise NotImplementedError

    def reduce(self, other):
        raise NotImplementedError


# a concrete subclass of Worker: a newline counter
class LineCountWorker(Worker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count('\n')

    def reduce(self, other):
        self.result += other.result

## How to construct objects?

Manually build and connect objects with some helper functions.

In [3]:
import os

def generate_path_inputs(data_dir):
    for name in os.listdir(data_dir):
        yield PathInputData(os.path.join(data_dir, name)) # return a generator


# create LineCounterWorker instances 
def create_line_count_workers(input_list):
    workers = []
    for input_data in input_list:
        workers.append(LineCountWorker(input_data))
    return workers


# multi-thread implementation 
from threading import Thread

def execute(workers):
    threads = [Thread(target=w.map) for w in workers]
    for thread in threads: thread.start()
    for thread in threads: thread.join()

    first, rest = workers[0], workers[1:]
    for worker in rest:
        first.reduce(worker)
    return first.result


# mapreduce for PathInputData using LineCountWorker
def mapreduce(data_dir):
    inputs = generate_path_inputs(data_dir)
    workers = create_line_count_workers(inputs)
    return execute(workers)


# the caller is here
with TemporaryDirectory() as tmpdir:
    print(tmpdir)
    
    write_test_files(tmpdir)
    
    result = mapreduce(tmpdir)

print('There are', result, 'lines')

/var/folders/bc/tll0j51s10j6g0fh4sd1_b5h0000gn/T/tmpqi2pmsez
There are 5059 lines


## Problem: `mapreduce` function is NOT generic

For another `InputData` or `Worker` subclass, your need new `generate_*_inputs`, `create_*_workers`, and `mapreduce` functions. 

## Beter way: use @classmethod polymorphism

In [4]:
# a generic base class for InputData
class GenericInputData(object):
    def read(self):
        raise NotImplementedError

    @classmethod
    def generate_inputs(cls, config):
        raise NotImplementedError


# a concrete subclass of the GenericInputData
class PathInputData(GenericInputData):
    def __init__(self, path):
        super().__init__()
        self.path = path

    def read(self):
        return open(self.path).read()

    @classmethod
    def generate_inputs(cls, config):
        data_dir = config['data_dir']
        for name in os.listdir(data_dir):
            yield cls(os.path.join(data_dir, name))


# a generic base class for workers
class GenericWorker(object):
    def __init__(self, input_data):
        self.input_data = input_data
        self.result = None

    def map(self):
        raise NotImplementedError

    def reduce(self, other):
        raise NotImplementedError

    @classmethod
    def create_workers(cls, input_class, config):
        workers = []
        for input_data in input_class.generate_inputs(config):
            workers.append(cls(input_data))
        return workers


# a concrete subclass of the GenericWorker: a line counter
class LineCountWorker(GenericWorker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count('\n')

    def reduce(self, other):
        self.result += other.result


# a generic mapreduce
def mapreduce(worker_class, input_class, config):
    workers = worker_class.create_workers(input_class, config)
    return execute(workers)


# use the generic function
with TemporaryDirectory() as tmpdir:
    print(tmpdir)
    write_test_files(tmpdir)
    
    config = {'data_dir': tmpdir}
    result = mapreduce(LineCountWorker, PathInputData, config)

print('There are', result, 'lines')

/var/folders/bc/tll0j51s10j6g0fh4sd1_b5h0000gn/T/tmpjytnahnp
There are 4622 lines


## References
* [Classmethods are useful for factory](https://realpython.com/instance-class-and-static-methods-demystified/)
