In [1]:
import pandas as pd
import seaborn as sns

## Working with Dictionaries 

In [1]:
def count_numbers_solution(list_of_data):
    """
    Counts the occurrences of each integer value in a list.
    
    Args:
        list_of_data: List of integer values to count.
    
    Returns:
        Dictionary where each value from the input list is mapped to
        the number of times it appears in the list.
    """

    res = {}
    for n in list_of_data:
        res[n] = list_of_data.count(n)
    return res

## Encoding Factors Using New Binary Factors

In [2]:
def new_columns_solution(column):
    """
    Encodes a categorical factor column into binary columns.
    
    Args:
        column: Categorical factor column.
    
    Returns:
        Dictionary mapping each category in the column to a list of 0s and 1s,
        where a 1 indicates the presence of the category in the original column.
    """
    
    res = {}

    categories = list(column.unique())
    loc = len(column)
    
    for n in categories:
        m = []
        for i in range(loc):
            if column.iloc[i] == n:
                m.append(1)
            else:
                m.append(0)
        res[n] = m

    return res

In [3]:
def categorical_to_binary_solution(data, columns):
    """
    Encodes categorical factors into binary factors in a table.
    
    Args:
        data: Table with categorical factors.
        columns: Columns with factors to encode.
    
    Returns:
        Original table with additional binary factors for each specified categorical factor.
    """

    for column in columns:
        d = new_columns_solution(data[column])
        for k, v in d.items():
            data[k] = v

    return data

## Target encoding

In [50]:
def round_to_2(x):
    
    return round(x, 2)

In [4]:
def target_coding_solution(data, factor_column, target_column):
    """
    Encodes a factor in a table into target encoding.
    
    Args:
        data: Original table.
        factor_column: Name of the factor column to encode.
        target_column: Name of the column containing the target value.
    
    Returns:
        None. Adds a new column named "encoded" to the original table with the result of encoding.
    """ 
    
    a = data.groupby(factor_column, as_index=False).aggregate({target_column: 'mean'})
    b = dict(zip(a[factor_column], list(map(lambda x: round(x, 2), a[target_column]))))
    c = []
    for el in data[factor_column]:
        c.append(b[el])
    data['encoded'] = c

## Standardization of factors

In [5]:
def standard_deviation_solution(column):
    """
    Calculates the range of values in a column.
    
    Args:
        column: Column with numerical values.
    
    Returns:
        Range of values in the column.
    """

    s0 = 0
    sr = column.mean()
    for el in column:
        s0 += (el - sr) ** 2
    return round_to_2((s0 / len(column)) ** (1/2))

In [6]:
def standartize_column_solution(column):
    """
    Standardizes values in a column.
    
    Args:
        column: Column with numerical values.
    
    Returns:
        Column with standardized values.
    """
    
    s = standard_deviation_solution(column)
    if s != 0:
        sr = column.mean()
        l = []
        for x in column:
            l.append((x - sr) / s)
        res = pd.Series(l).round(2)
    else:
        res = pd.Series([0] * len(column)).astype('float')
    return res

## Correlation

In [7]:
def correlation_solution(x, y):
    """
    Calculates the correlation between two lists of numerical values.
    
    Args:
        x: First list of numerical values.
        y: Second list of numerical values.
    
    Returns:
        Correlation between the values in the lists.
    """
    
    xsr = sum(x) / len(x)
    ysr = sum(y) / len(y)
    cor1, cor2, cor3 = 0, 0, 0
    
    for i in range(len(x)):
        cor1 += (x[i] - xsr) * (y[i] - ysr)
        cor2 += (x[i] - xsr) ** 2
        cor3 += (y[i] - ysr) ** 2
        
    return round_to_2(cor1 / (cor2 * cor3) ** (1/2))

In [9]:
def correlation_one_line_solution(column_x, column_y):
    """
    Calculates the correlation between values in two columns.
    
    Args:
        column_x: First column with numerical values.
        column_y: Second column with numerical values.
    
    Returns:
        Correlation between values in the two columns.
    """
    
    return round_to_2(((column_x - column_x.mean()) * (column_y - column_y.mean())).sum() / ((((column_x - column_x.mean()) ** 2).sum() * ((column_y - column_y.mean()) ** 2).sum()) ** (1/2)))

In [10]:
def correlation_table_solution(table):
    """
    Calculates correlations between columns of a data table.
    
    Args:
        table: Table with numerical values.
    
    Returns:
        Table of correlations between columns of the original table.
    """
    
    col = (table.columns)
    res  = []
    
    for i in col:
        res1 = []
        
        for j in col:
            if j == i:
                res1.append(1.0)
            else:
                res1.append(correlation_solution(table[i], table[j]))
                
        res.append(res1)    
    return res