## Task 1. Decorators
1. Write a decorator, which checks complience of function's signature to given interface.
2. Write a decorator, which decorates function with a provided decorator.
3. Write a decorator, which adds benchmarking information (function execution time) to function.
4. 
Write a decorator, which logs function's calls (prints function names and values of functions arguments.)

In [4]:
# Decorator, which checks complience of function's signature to given interface
import typing

def check_signature(interface):
    interface_hints = typing.get_type_hints(interface)
    
    def decorator(func):
        func_hints = typing.get_type_hints(func)
        if func_hints != interface_hints:
            raise TypeError(f"Function {func.__name__} does not match the interface {interface.__name__}")
        return func
    
    return decorator


def interface_func(a: int, b: str) -> bool:
    pass


@check_signature(interface_func)
def compliant_func(a: int, b: str) -> bool:
    return True


try:
    @check_signature(interface_func)
    def non_compliant_func(a: str, b: int) -> bool:
        pass
except TypeError as e:
    print(e)

Function non_compliant_func does not match the interface interface_func


In [22]:
# Decorator, which decorates function with a provided decorator
def apply_decorator(provided_decorator):
    def decorator(func):
        return provided_decorator(func)
    
    return decorator


def simple_decorator(func):
    def wrapper(*args, **kwargs):
        print("Before call:")
        result = func(*args, **kwargs)
        print("After call:")
        return result
    return wrapper


@apply_decorator(simple_decorator)
def say_name(name):
    print(f"{name}")
    return "Bob"


print(f"{say_name('Alice')}")

Before call:
Alice
After call:
Bob


In [18]:
# Decorator, which adds benchmarking information (function execution time) to function
import time

def benchmark(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        execution_time = end_time - start_time
        print(f"Function {func.__name__} executed in {execution_time:.6f} seconds")
        return result
    return wrapper


@benchmark
def slow_mult(a, b):
    time.sleep(2)
    return a * b


print(f"Result: {slow_mult(3, 4)}")

Function slow_mult executed in 2.005097 seconds
Result: 12


In [23]:
# Decorator, which logs function's calls (prints function names and values of functions arguments)
import inspect

def log_calls(func):
    def wrapper(*args, **kwargs):
        sig = inspect.signature(func)
        bound_args = sig.bind(*args, **kwargs)
        bound_args.apply_defaults()  # Include default values
        arg_str = ", ".join(f"{k}={repr(v)}" for k, v in bound_args.arguments.items())
        print(f"Calling {func.__name__} with arguments: {arg_str}")
        result = func(*args, **kwargs)
        return result
    return wrapper


@log_calls
def greet(name="World", greeting="Hello"):
    return f"{greeting}, {name}!"


print(greet())
print(greet("Alice"))
print(greet("Bob", greeting="Hi"))

Calling greet with arguments: name='World', greeting='Hello'
Hello, World!
Calling greet with arguments: name='Alice', greeting='Hello'
Hello, Alice!
Calling greet with arguments: name='Bob', greeting='Hi'
Hi, Bob!


### Task 2. Configurable pipeline of integer data transformations
Assume we have a list of integers, which represents some abstract collected data. Create a class for creating configurable pipelines of transformations for the provided list of integers. The following transformations should be supported:
1. Convertion to ratios (every element is divided by the sum of all list elements)
2. Removal of the list mean value
3. Filtering of negative or zero elements

In [24]:
def to_ratios(data):
    if not data:
        return []
    total = sum(data)
    if total == 0:
        return data
    return [x / total for x in data]


def substract_mean(data):
    if not data:
        return []
    mean = sum(data) / len(data)
    return [x - mean for x in data]


def filter_non_positive(data):
    return [x for x in data if x > 0]


class Pipeline:
    TRANSFORMATIONS = {
        'to_ratios': to_ratios,
        'substract_mean': substract_mean,
        'filter_non_positive': filter_non_positive
    }
    
    def __init__(self):
        self.transformations = []
    
    def add_transformation(self, transformation_name):
        if transformation_name not in self.TRANSFORMATIONS:
            raise ValueError(f"Unknown transformation: {transformation_name}")
        self.transformations.append(transformation_name)
    
    def execute(self, data):
        result = data
        for transform_name in self.transformations:
            transform_func = self.TRANSFORMATIONS[transform_name]
            result = transform_func(result)
        return result

In [25]:
data = [1, -2, 3, 0]

pipeline = Pipeline()

pipeline.add_transformation('substract_mean')
pipeline.add_transformation('filter_non_positive')
pipeline.add_transformation('to_ratios')

result = pipeline.execute(data)

print("Original data:", data)
print("Transformed data:", result)

Original data: [1, -2, 3, 0]
Transformed data: [0.16666666666666666, 0.8333333333333334]


## Task 3. Text preprocessing
Assume we have an arbitrary text and plan to prepare it for a further data analysis. The text might contain html tags, emails, latex commands. The task is to create a configurable text preprocessor, which allows to remove certain elements from provided text. The following filters should be supported:
1. Removal of punctuation symbols from a given list 
2. Removal of stop words from a given list
3. Removal of HTML tags
4. Removal of e-mails

In [38]:
import re
import string
from bs4 import BeautifulSoup
from typing import List


class TextPreprocessor:
    def __init__(self, remove_punctuations: List[str] = None, stop_words: List[str] = None):
        self.remove_punctuations = remove_punctuations if remove_punctuations else []
        self.stop_words = set(stop_words) if stop_words else set()

    def remove_punctuation(self, text: str) -> str:
        if self.remove_punctuations:
            trans_table = str.maketrans('', '', ''.join(self.remove_punctuations))
            text = text.translate(trans_table)
        return text

    def remove_stopwords(self, text: str) -> str:
        words = text.split()
        filtered_words = [word for word in words if word.lower() not in self.stop_words]
        return ' '.join(filtered_words)

    def remove_html_tags(self, text: str) -> str:
        return BeautifulSoup(text, 'html.parser').get_text()

    def remove_emails(self, text: str) -> str:
        return re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '', text)

    def preprocess(self, text: str, remove_punc: bool = True, remove_stop: bool = True, remove_html: bool = True, remove_email: bool = True) -> str:
        if remove_html:
            text = self.remove_html_tags(text)
        if remove_email:
            text = self.remove_emails(text)
        if remove_punc:
            text = self.remove_punctuation(text)
        if remove_stop:
            text = self.remove_stopwords(text)
        return text


text = """
<html><body>This is an example email: test@example.com. Check <b>some bold text</b> for more info.</body></html>
Weather is not so nice today.
"""

punctuations_to_remove = list(string.punctuation)
stop_words_list = ["is", "an", "also", "at", "not"]

preprocessor = TextPreprocessor(remove_punctuations=punctuations_to_remove, stop_words=stop_words_list)

cleaned_text = preprocessor.preprocess(text)
print(cleaned_text)

This example email Check some bold text for more info Weather so nice today
