# Functions Library

## Data Science Learning

### Filter a dataframe by columnname And Returns filtred dataframe

In [1]:
# Filter a dataframe by columnname And Returns filtred dataframe
def filter_dataframe(df, column_filter, columnname, columns):
    '''
    INPUT:
    df - Data Frame to be filtered
    column_filter -  the column to be used to filter the dataframe
    columnname - the column value based on which dataframe will be filtered 
    columns - list of columns required as part of new generated dataframe after filtering
    
    OUTPUT:
    df_copy - filtered dataframe based containing 'columns'
    
    Description:
    Filter the dataframe (df) by Column Name (column_filter eg. Country) 
    and Column Value (columnname eg. India - Country column value) and Select the required Columns (columns)
    
    '''    
    df_copy = df
    
    for column in columns: 
        df_copy = df_copy[df_copy[column_filter] == columnname].dropna(subset=[column])
        
    return df_copy

### Split column by some character value And Returns a Splitted Series

In [2]:
# Split column by some character value And Returns a Splitted Series
def split_column(df, column, splitChar):
    '''
    INPUT:
    df - Data Frame to be splitted
    column -  the column to be considered for splitting
    splitChar - character based on which values are to be splitted 
    
    OUTPUT:
    column_series - series into which dataframe is splitted
    
    Description:
    Splitting the Data Frame by column into a Series
    
    '''

    df_copy = df
    column_series = df_copy[column].apply(lambda x: x.split(splitChar))
    
    return column_series

### Flat a Nested List And Returns a Flat List

In [3]:
# Flat a Nested List And Returns a Flat List
def disarray(array_list):
    '''
    INPUT:
    array_list - Data Frame to be splitted
    
    OUTPUT:
    objects - flat list
    
    Description:
    Flatting the nested list and returns a flat list (objects)
    
    '''

    objects = []
    
    for row in array_list:
        for obj in row:
            objects.append(obj.strip())
            
    return objects

### Group by count to a list And Returns a Result Dictionary

In [4]:
# Group by count to a list And Returns a Result Dictionary
def group_list_PLang(data_list, year):
    '''
    INPUT:
    data_list - list to be grouped
    year - year value for grouping the list
    
    OUTPUT:
    grouped_dict - grouped dictionary
    
    Description:
    Group by count to a list And Returns a Result Dictionary with 'Programming Language' as Key
    
    '''
    grouped_list = dict(Counter(data_list))
    grouped_dict = [{'Programming Language':key, 'Count': value, 'Year': year} for key, value in grouped_list.items()]
    
    return grouped_dict

In [5]:
# Group by count to a list And Returns a Result Dictionary
def group_list_Plat(data_list, year):
    '''
    INPUT:
    data_list - list to be grouped
    year - year value for grouping the list
    
    OUTPUT:
    grouped_dict - grouped dictionary
    
    Description:
    Group by count to a list And Returns a Result Dictionary with 'Cloud Platforms' as Key
    
    '''

    grouped_list = dict(Counter(data_list))
    grouped_dict = [{'Cloud Platforms':key, 'Count': value, 'Year': year} for key, value in grouped_list.items()]
    
    return grouped_dict

In [6]:
# Group by count to a list And Returns a Result Dictionary
def group_list_MiscTech(data_list, region):
    '''
    INPUT:
    data_list - list to be grouped
    region - value for grouping the list
    
    OUTPUT:
    grouped_dict - grouped dictionary
    
    Description:
    Group by count to a list And Returns a Result Dictionary with 'MiscTechWantToWorkWith' as Key
    
    '''
    grouped_list = dict(Counter(data_list))
    grouped_dict = [{'MiscTechWantToWorkWith':key, 'Count': value, 'Region': region} for key, value in grouped_list.items()]
    
    return grouped_dict

In [7]:
# Group by count to a list And Returns a Result Dictionary
def group_list_NEWStuck(data_list, year):
    '''
    INPUT:
    data_list - list to be grouped
    year - year value for grouping the list
    
    OUTPUT:
    grouped_dict - grouped dictionary
    
    Description:
    Group by count to a list And Returns a Result Dictionary with 'Stucked Issues Dealing' as Key
    
    '''

    grouped_list = dict(Counter(data_list))
    grouped_dict = [{'Stucked Issues Dealing':key, 'Count': value, 'Year': year} for key, value in grouped_list.items()]
    
    return grouped_dict

### Create two dataframes, append them And Returns an Appended DataFrame

In [8]:
# Create two dataframes, append them And Returns an Appended DataFrame
def create_dataframe(data_dicts):
    '''
    INPUT:
    data_dicts - List of dataframes to be concatenated into single dataframe
    
    OUTPUT:
    df - newly generated dataframe as a result of appending the two dataframes
    
    Description:
    Create two dataframes from a list, append them And Returns an Appended DataFrame
    
    '''

    df1 = pd.DataFrame(data_dicts[0])
    df2 = pd.DataFrame(data_dicts[1])

    df = pd.concat([df1,df2])
    
    return df

### Returns Scaled data

In [9]:
# Returns Scaled data
def percentage(df, column):
    '''
    INPUT:
    df - dataframe
    column - column name in the context
    
    OUTPUT:
    joined_series - series with scaled data
    
    Description:
    Returns joined_series containing Scaled data with calculated percentage value for every unique value in given 'column'
    
    '''

    df_copy = df
    series = []
    
    for val in df_copy[column].unique():
        series.append((df_copy[df_copy[column] == val]['Count'] / 
                      df_copy[df_copy[column] == val]['Count'].sum())*100)
        
    joined_series = pd.Series([],dtype=pd.StringDtype())
    for i_series in series:
        joined_series = pd.concat([joined_series,i_series])
        
    return joined_series

### Define Signle Color for WordCloud - It will generate WordCloud in single color

In [10]:
# Define Signle Color for WordCloud - It will generate WordCloud in single color
def one_color_func(word=None, font_size=None, 
                   position=None, orientation=None, 
                   font_path=None, random_state=None):
        '''
            Define Signle Color for WordCloud
            It will generate WordCloud in single color
        '''

    h = 40 # 0 - 360
    s = 100 # 0 - 100
    l = random_state.randint(30, 70) # 0 - 100
    return "hsl({}, {}%, {}%)".format(h, s, l)

### Define multiple Colors for WordCloud - It will generate Multi-Color WordCloud

In [11]:
# Define multiple Colors for WordCloud - It will generate Multi-Color WordCloud
def multi_color_func(word=None, font_size=None,
                     position=None, orientation=None,
                     font_path=None, random_state=None):
        '''
            Define multiple Colors for WordCloud
            It will generate Multi-Color WordCloud
        '''
    
    colors = [[4, 77, 82],
              [25, 74, 85],
              [82, 43, 84],
              [158, 48, 79]]
    rand = random_state.randint(0, len(colors) - 1)
    return "hsl({}, {}%, {}%)".format(colors[rand][0], colors[rand][1], colors[rand][2])