In [28]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

In [29]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import random
import math

In [30]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.calibration import LabelEncoder

In [31]:
from scikeras.wrappers import KerasClassifier

In [32]:
%matplotlib inline
sns.set_theme()

In [33]:
df = pd.read_csv("kaggle_survey_2020_responses.csv")

In [34]:
salary_data = df.drop(columns = ["time_from_start_to_finish_seconds"])

In [35]:
salary_data_as_num = pd.DataFrame()

In [36]:
def convert_to_category(col_name: str, order_rules: list, data):
    data[col_name] = pd.Categorical(data[col_name], order_rules)

In [37]:
def convert_to_category_no_specified_order(col_name, data):
    if sum(data[col_name].isna().astype(int)) > 0:
        data[col_name].fillna("No response", inplace = True)
    
    order = list(set(data[col_name]))
    convert_to_category(col_name, order, data)

In [38]:
def convert_category_to_code(col_name: str, data, inplace = False):
    if inplace:
        data[col_name] = data[col_name].cat.codes + 1 # because NaN automatically becomes -1
    else:
        return data[col_name].cat.codes + 1

In [39]:
def process_column(col_name: str, order_rules = None, data = salary_data, num_data = salary_data_as_num):
    if order_rules:
        convert_to_category(col_name, order_rules, data)
    else:
        convert_to_category_no_specified_order(col_name, data)
    num_data[col_name] = convert_category_to_code(col_name, data)

In [40]:
def one_hot_column_to_binary(col_name, data = salary_data):
    data[col_name].fillna(0, inplace = True)
    data[col_name].mask(data[col_name] != 0, 1, inplace = True)

In [41]:
def process_one_hot_encoded_columns(columns, data = salary_data, num_data = salary_data_as_num):
    for col in columns:
        one_hot_column_to_binary(col, data)
        num_data[col] = data[col]

In [42]:
def column_text_to_binary(col_name, data = salary_data, num_data = salary_data_as_num):
    data[col_name] = data[col_name].notna().astype(int)
    num_data[col_name] = data[col_name]

In [43]:
def combine_multiple_columns_into_one_binary(columns, new_col_name, data = salary_data, num_data = salary_data_as_num):
    for col_name in columns:
        one_hot_column_to_binary(col_name)
        
    data[new_col_name] = data[columns].sum(axis = 1)
    data[new_col_name] = data[new_col_name].astype(int)
    
    data[new_col_name].mask(data[new_col_name] > 0, 1, inplace = True)
    num_data[new_col_name] = data[new_col_name]

## Q24 Target Column

### v1: original bins

In [44]:
q24_order = ["$0-999",
             '1,000-1,999',
             '2,000-2,999',
             '3,000-3,999',
             '4,000-4,999',
             '5,000-7,499',
             '7,500-9,999',
             '10,000-14,999',
             '15,000-19,999',
             '20,000-24,999',
             '25,000-29,999',
             '30,000-39,999',
             '40,000-49,999',
             '50,000-59,999',
              '60,000-69,999',
              '70,000-79,999',
              '80,000-89,999',
              '90,000-99,999',
            '100,000-124,999',
            '125,000-149,999',
            '150,000-199,999',
             '200,000-249,999',
             '250,000-299,999',
              '300,000-500,000',
              '> $500,000'
             ]

## v2: numerical

In [45]:
q24_mapped_to = [
    500, #"$0-999",
    1500, #        '1,000-1,999',
    2500, #         '2,000-2,999',
    3500, #         '3,000-3,999',
    4500, #         '4,000-4,999',
    6250, #        '5,000-7,499',
    8250, #         '7,500-9,999',
    12500, #         '10,000-14,999',
    17500, #         '15,000-19,999',
    22500, #         '20,000-24,999',
    27500, #         '25,000-29,999',
    35000, #         '30,000-39,999',
    45000, #         '40,000-49,999',
    55000, #         '50,000-59,999',
    65000, #          '60,000-69,999',
    75000, #          '70,000-79,999',
    85000, #          '80,000-89,999',
    95000, #          '90,000-99,999',
    112500, #        '100,000-124,999',
    137500, #        '125,000-149,999',
    175000, #        '150,000-199,999',
    225000, #         '200,000-249,999',
    275000, #         '250,000-299,999',
    400000, #          '300,000-500,000',
    500000 #          '> $500,000'
]

In [46]:
q24_mapping = dict(zip(q24_order, q24_mapped_to))

In [47]:
test_col = salary_data["q24"].copy()
test_col

0                    NaN
1        100,000-124,999
2          15,000-19,999
3        125,000-149,999
4                    NaN
              ...       
20031                NaN
20032                NaN
20033             $0-999
20034             $0-999
20035             $0-999
Name: q24, Length: 20036, dtype: object

In [48]:
test_col.fillna(0, inplace = True)
test_col

0                      0
1        100,000-124,999
2          15,000-19,999
3        125,000-149,999
4                      0
              ...       
20031                  0
20032                  0
20033             $0-999
20034             $0-999
20035             $0-999
Name: q24, Length: 20036, dtype: object

In [49]:
test_col = test_col.replace(q24_mapping)

In [50]:
salary_data["q24"] = test_col
salary_data_as_num["q24"] = test_col

## Q1 Age

In [51]:
salary_data["q1"].value_counts()

q1
25-29    4011
22-24    3786
18-21    3469
30-34    2811
35-39    1991
40-44    1397
45-49     988
50-54     698
55-59     411
60-69     398
70         76
Name: count, dtype: int64

In [52]:
q1_order = [
    "18-21",
    "22-24",
    "25-29",
    "30-34",
    "35-39",
    "40-44",
    "45-49",
    "50-54",
    "55-59",
    "60-69",
    "70"
]

In [53]:
convert_to_category("q1", q1_order, salary_data)

In [54]:
salary_data_as_num["q1"] = convert_category_to_code("q1", salary_data, False)
salary_data_as_num

Unnamed: 0,q24,q1
0,0,5
1,112500,4
2,17500,5
3,137500,4
4,0,4
...,...,...
20031,0,1
20032,0,9
20033,500,4
20034,500,2


In [55]:
salary_data_as_num["q1"].value_counts()

q1
3     4011
2     3786
1     3469
4     2811
5     1991
6     1397
7      988
8      698
9      411
10     398
11      76
Name: count, dtype: int64

## Q2 Gender

In [56]:
salary_data["q2"].value_counts()

q2
Man                        15789
Woman                       3878
Prefer not to say            263
Prefer to self-describe       54
Nonbinary                     52
Name: count, dtype: int64

In [57]:
q2_order = ['Man', "Woman", "Nonbinary", 'Prefer to self-describe', 'Prefer not to say']

In [58]:
convert_to_category("q2", q2_order, salary_data)

In [59]:
salary_data_as_num["q2"] = convert_category_to_code("q2", salary_data, False)
salary_data_as_num

Unnamed: 0,q24,q1,q2
0,0,5,1
1,112500,4,1
2,17500,5,1
3,137500,4,1
4,0,4,1
...,...,...,...
20031,0,1,1
20032,0,9,2
20033,500,4,1
20034,500,2,1


In [60]:
salary_data_as_num["q2"].value_counts()

q2
1    15789
2     3878
5      263
4       54
3       52
Name: count, dtype: int64

## Q3 Country

In [61]:
convert_to_category_no_specified_order("q3", salary_data)

In [62]:
salary_data_as_num["q3"] = convert_category_to_code("q3", salary_data)

## Q4 Education

In [63]:
salary_data["q4"].value_counts()

q4
Master’s degree                                                      7859
Bachelor’s degree                                                    6978
Doctoral degree                                                      2302
Some college/university study without earning a bachelor’s degree    1092
Professional degree                                                   699
I prefer not to answer                                                399
No formal education past high school                                  240
Name: count, dtype: int64

In [64]:
q4_order = ["No formal education past high school",
            "Some college/university study without earning a bachelor’s degree",
            "Professional degree",
            "Bachelor’s degree",
            "Master’s degree",
            "Doctoral degree",
            "I prefer not to answer"
            ]

In [65]:
convert_to_category("q4", q4_order, salary_data)

In [66]:
salary_data_as_num["q4"] = convert_category_to_code("q4", salary_data)

In [67]:
salary_data_as_num["q4"].value_counts()

q4
5    7859
4    6978
6    2302
2    1092
3     699
0     467
7     399
1     240
Name: count, dtype: int64

## Q5 Job Title

In [68]:
salary_data["q5"].value_counts()

q5
Student                      5171
Data Scientist               2676
Software Engineer            1968
Other                        1737
Currently not employed       1652
Data Analyst                 1475
Research Scientist           1174
Machine Learning Engineer    1082
Business Analyst              798
Product/Project Manager       692
Data Engineer                 437
Statistician                  290
DBA/Database Engineer         125
Name: count, dtype: int64

In [69]:
convert_to_category_no_specified_order("q5", salary_data)

In [70]:
salary_data_as_num["q5"] = convert_category_to_code("q5", salary_data)

## Q6 Years Coding

In [71]:
q6_order = [
 'I have never written code',
 '< 1 years',
 '1-2 years',
 '3-5 years',
 '5-10 years',
 '10-20 years',
 '20+ years']

In [72]:
process_column("q6", q6_order)

## Q7 Language

In [73]:
q7_columns = [
     'q7_part_1',
 'q7_part_2',
 'q7_part_3',
 'q7_part_4',
 'q7_part_5',
 'q7_part_6',
 'q7_part_7',
 'q7_part_8',
 'q7_part_9',
 'q7_part_10',
 'q7_part_11',
 'q7_part_12',
 'q7_other'
]

In [74]:
process_one_hot_encoded_columns(q7_columns)

## Q11 Computing Platform

In [75]:
process_column("q11")

## Q12 Specialized Hardware

In [76]:
q12_columns = [
    'q12_part_1',
 'q12_part_2',
 'q12_part_3',
 'q12_other'
]

In [77]:
process_one_hot_encoded_columns(q12_columns)

## Q14 Visualization

In [78]:
q14_columns = [
    'q14_part_1',
 'q14_part_2',
 'q14_part_3',
 'q14_part_4',
 'q14_part_5',
 'q14_part_6',
 'q14_part_7',
 'q14_part_8',
 'q14_part_9',
 'q14_part_10',
 'q14_part_11',
 'q14_other'
]

In [79]:
process_one_hot_encoded_columns(q14_columns)

## Q15 Years ML

In [80]:
q15_order = [
    'I do not use machine learning methods',
    'Under 1 year',
    '1-2 years',
    '2-3 years',
    '3-4 years',
    '4-5 years',
    '5-10 years',
    '10-20 years',
    '20 or more years'
]

In [81]:
process_column("q15", q15_order)

## Q17 ML Algorithms

In [82]:
q17_columns = [
    'q17_part_1',
 'q17_part_2',
 'q17_part_3',
 'q17_part_4',
 'q17_part_5',
 'q17_part_6',
 'q17_part_7',
 'q17_part_8',
 'q17_part_9',
 'q17_part_10',
 'q17_part_11',
 'q17_other'
]

In [83]:
process_one_hot_encoded_columns(q17_columns)

## Q20 Company Size

In [84]:
q20_order = [
    '0-49 employees',
    '50-249 employees',
    '250-999 employees',
    '1000-9,999 employees',
    '10,000 or more employees'
]

In [85]:
process_column("q20", q20_order)

## Q21 Datascience Workloads

In [86]:
q21_order = [
    '0',
    '1-2',
    '3-4',
    '5-9',
    '10-14',
    '15-19',
    '20'
]

In [87]:
process_column("q21", q21_order)

## Q22 Incorporating ML

i'm not super sure about the proper "order" for this question. Feel free to change this if you find it more appropriate. Just please let the chat know in case it affects others' encoding.

In [88]:
q22_order = [
    'I do not know',
    'No (we do not use ML methods)',
    'We are exploring ML methods (and may one day put a model into production)',
    'We use ML methods for generating insights (but do not put working models into production)',
    'We recently started using ML methods (i.e., models in production for less than 2 years)',
    'We have well established ML methods (i.e., models in production for more than 2 years)'
]

In [89]:
process_column("q22", q22_order)

## Q30 Big Data Products

In [90]:
column_text_to_binary("q30")

## Q32 Business Intelligence Tools

In [91]:
column_text_to_binary("q32")

## Q33 Automated ML Tools

In [92]:
q33_columns = [
    'q33_a_part_1',
 'q33_a_part_2',
 'q33_a_part_3',
 'q33_a_part_4',
 'q33_a_part_5',
 'q33_a_part_6',
 'q33_a_part_7',
 'q33_a_other'
]

In [93]:
combine_multiple_columns_into_one_binary(q33_columns, "q33")

## Q37 Data Science Courses

In [94]:
q37_columns = [
    'q37_part_1',
 'q37_part_2',
 'q37_part_3',
 'q37_part_4',
 'q37_part_5',
 'q37_part_6',
 'q37_part_7',
 'q37_part_8',
 'q37_part_9',
 'q37_part_10',
 'q37_part_11',
 'q37_other'
]

In [95]:
process_one_hot_encoded_columns(q37_columns)

## Q38 Primary Data Analysis Tool

In [96]:
process_column("q38")

## Q39 Media Sources

In [97]:
q39_columns = [
    'q39_part_1',
 'q39_part_2',
 'q39_part_3',
 'q39_part_4',
 'q39_part_5',
 'q39_part_6',
 'q39_part_7',
 'q39_part_8',
 'q39_part_9',
 'q39_part_10',
 'q39_part_11',
 'q39_other'
]

In [98]:
process_one_hot_encoded_columns(q39_columns)

## Dropped Columns

In [99]:
one_hot_dropped = [
    'q33_a_part_1',
 'q33_a_part_2',
 'q33_a_part_3',
 'q33_a_part_4',
 'q33_a_part_5',
 'q33_a_part_6',
 'q33_a_part_7',
 'q33_a_other',
]

In [100]:
dropped_questions = [
    "q8",
    'q9_part_1',
 'q9_part_2',
 'q9_part_3',
 'q9_part_4',
 'q9_part_5',
 'q9_part_6',
 'q9_part_7',
 'q9_part_8',
 'q9_part_9',
 'q9_part_10',
 'q9_part_11',
 'q9_other',
 'q10_part_1',
 'q10_part_2',
 'q10_part_3',
 'q10_part_4',
 'q10_part_5',
 'q10_part_6',
 'q10_part_7',
 'q10_part_8',
 'q10_part_9',
 'q10_part_10',
 'q10_part_11',
 'q10_part_12',
 'q10_part_13',
 'q10_other',
 "q13",
 'q16_part_1',
 'q16_part_2',
 'q16_part_3',
 'q16_part_4',
 'q16_part_5',
 'q16_part_6',
 'q16_part_7',
 'q16_part_8',
 'q16_part_9',
 'q16_part_10',
 'q16_part_11',
 'q16_part_12',
 'q16_part_13',
 'q16_part_14',
 'q16_part_15',
 'q16_other',
 'q18_part_1',
 'q18_part_2',
 'q18_part_3',
 'q18_part_4',
 'q18_part_5',
 'q18_part_6',
 'q18_other',
 'q19_part_1',
 'q19_part_2',
 'q19_part_3',
 'q19_part_4',
 'q19_part_5',
 'q19_other',
 'q23_part_1',
 'q23_part_2',
 'q23_part_3',
 'q23_part_4',
 'q23_part_5',
 'q23_part_6',
 'q23_part_7',
 'q23_other',
 'q25',
 'q26_a_part_1',
 'q26_a_part_2',
 'q26_a_part_3',
 'q26_a_part_4',
 'q26_a_part_5',
 'q26_a_part_6',
 'q26_a_part_7',
 'q26_a_part_8',
 'q26_a_part_9',
 'q26_a_part_10',
 'q26_a_part_11',
 'q26_a_other',
 'q27_a_part_1',
 'q27_a_part_2',
 'q27_a_part_3',
 'q27_a_part_4',
 'q27_a_part_5',
 'q27_a_part_6',
 'q27_a_part_7',
 'q27_a_part_8',
 'q27_a_part_9',
 'q27_a_part_10',
 'q27_a_part_11',
 'q27_a_other',
 'q28_a_part_1',
 'q28_a_part_2',
 'q28_a_part_3',
 'q28_a_part_4',
 'q28_a_part_5',
 'q28_a_part_6',
 'q28_a_part_7',
 'q28_a_part_8',
 'q28_a_part_9',
 'q28_a_part_10',
 'q28_a_other',
 'q29_a_part_1',
 'q29_a_part_2',
 'q29_a_part_3',
 'q29_a_part_4',
 'q29_a_part_5',
 'q29_a_part_6',
 'q29_a_part_7',
 'q29_a_part_8',
 'q29_a_part_9',
 'q29_a_part_10',
 'q29_a_part_11',
 'q29_a_part_12',
 'q29_a_part_13',
 'q29_a_part_14',
 'q29_a_part_15',
 'q29_a_part_16',
 'q29_a_part_17',
 'q29_a_other',
 'q31_a_part_1',
 'q31_a_part_2',
 'q31_a_part_3',
 'q31_a_part_4',
 'q31_a_part_5',
 'q31_a_part_6',
 'q31_a_part_7',
 'q31_a_part_8',
 'q31_a_part_9',
 'q31_a_part_10',
 'q31_a_part_11',
 'q31_a_part_12',
 'q31_a_part_13',
 'q31_a_part_14',
 'q31_a_other',
 'q34_a_part_1',
 'q34_a_part_2',
 'q34_a_part_3',
 'q34_a_part_4',
 'q34_a_part_5',
 'q34_a_part_6',
 'q34_a_part_7',
 'q34_a_part_8',
 'q34_a_part_9',
 'q34_a_part_10',
 'q34_a_part_11',
 'q34_a_other',
 'q35_a_part_1',
 'q35_a_part_2',
 'q35_a_part_3',
 'q35_a_part_4',
 'q35_a_part_5',
 'q35_a_part_6',
 'q35_a_part_7',
 'q35_a_part_8',
 'q35_a_part_9',
 'q35_a_part_10',
 'q35_a_other',
 'q36_part_1',
 'q36_part_2',
 'q36_part_3',
 'q36_part_4',
 'q36_part_5',
 'q36_part_6',
 'q36_part_7',
 'q36_part_8',
 'q36_part_9',
 'q36_other',
]

In [101]:
part_b_dropped = [
    'q26_b_part_1',
 'q26_b_part_2',
 'q26_b_part_3',
 'q26_b_part_4',
 'q26_b_part_5',
 'q26_b_part_6',
 'q26_b_part_7',
 'q26_b_part_8',
 'q26_b_part_9',
 'q26_b_part_10',
 'q26_b_part_11',
 'q26_b_other',
 'q27_b_part_1',
 'q27_b_part_2',
 'q27_b_part_3',
 'q27_b_part_4',
 'q27_b_part_5',
 'q27_b_part_6',
 'q27_b_part_7',
 'q27_b_part_8',
 'q27_b_part_9',
 'q27_b_part_10',
 'q27_b_part_11',
 'q27_b_other',
 'q28_b_part_1',
 'q28_b_part_2',
 'q28_b_part_3',
 'q28_b_part_4',
 'q28_b_part_5',
 'q28_b_part_6',
 'q28_b_part_7',
 'q28_b_part_8',
 'q28_b_part_9',
 'q28_b_part_10',
 'q28_b_other',
 'q29_b_part_1',
 'q29_b_part_2',
 'q29_b_part_3',
 'q29_b_part_4',
 'q29_b_part_5',
 'q29_b_part_6',
 'q29_b_part_7',
 'q29_b_part_8',
 'q29_b_part_9',
 'q29_b_part_10',
 'q29_b_part_11',
 'q29_b_part_12',
 'q29_b_part_13',
 'q29_b_part_14',
 'q29_b_part_15',
 'q29_b_part_16',
 'q29_b_part_17',
 'q29_b_other',
 'q31_b_part_1',
 'q31_b_part_2',
 'q31_b_part_3',
 'q31_b_part_4',
 'q31_b_part_5',
 'q31_b_part_6',
 'q31_b_part_7',
 'q31_b_part_8',
 'q31_b_part_9',
 'q31_b_part_10',
 'q31_b_part_11',
 'q31_b_part_12',
 'q31_b_part_13',
 'q31_b_part_14',
 'q31_b_other',
 'q33_b_part_1',
 'q33_b_part_2',
 'q33_b_part_3',
 'q33_b_part_4',
 'q33_b_part_5',
 'q33_b_part_6',
 'q33_b_part_7',
 'q33_b_other',
 'q34_b_part_1',
 'q34_b_part_2',
 'q34_b_part_3',
 'q34_b_part_4',
 'q34_b_part_5',
 'q34_b_part_6',
 'q34_b_part_7',
 'q34_b_part_8',
 'q34_b_part_9',
 'q34_b_part_10',
 'q34_b_part_11',
 'q34_b_other',
 'q35_b_part_1',
 'q35_b_part_2',
 'q35_b_part_3',
 'q35_b_part_4',
 'q35_b_part_5',
 'q35_b_part_6',
 'q35_b_part_7',
 'q35_b_part_8',
 'q35_b_part_9',
 'q35_b_part_10',
 'q35_b_other'
]

In [102]:
salary_data = salary_data.drop(columns = one_hot_dropped)

In [103]:
salary_data = salary_data.drop(columns = part_b_dropped)

In [104]:
salary_data_selected_questions = salary_data.drop(columns = dropped_questions)

# DNN Model

In [105]:
X = salary_data_as_num.drop(columns = ["q24"])
y = salary_data_as_num["q24"]

In [106]:
y

0             0
1        112500
2         17500
3        137500
4             0
          ...  
20031         0
20032         0
20033       500
20034       500
20035       500
Name: q24, Length: 20036, dtype: int64

In [107]:
x_dev, x_test, y_dev, y_test = train_test_split(X, y, test_size = 0.2, random_state = 6)

In [108]:
x_dev = tf.convert_to_tensor(x_dev.astype("int64"))
x_test = tf.convert_to_tensor(x_test.astype("int64"))
y_dev = tf.convert_to_tensor(y_dev.astype("int64"))
y_test = tf.convert_to_tensor(y_test.astype("int64"))

2023-11-28 18:53:27.783459: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2023-11-28 18:53:27.783635: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2023-11-28 18:53:27.783648: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2023-11-28 18:53:27.784080: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-11-28 18:53:27.785392: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [124]:
class DNN:
    
    def __init__(self, x_dev = x_dev, y_dev = y_dev, x_test = x_test, y_test = y_test):
        self.x_dev = x_dev
        self.y_dev = y_dev
        self.x_test = x_test
        self.y_test = y_test
        
        self.layers = [Dense(32, input_shape = (80, ), activation = "relu")]
        
        self.optimizer = "adam"
        self.loss = "mean_squared_error"
        self.metrics =  ["mean_squared_error"]
        
        self.batch_size = 100
        self.epochs = 50
        
    def customize_first_layer(self, node_count = 32):
        self.layers = [Dense(node_count, input_shape = (80, ), activation = "relu")]
        
    def add_one_dense_layer(self, node_count = 32):
        self.layers.append(Dense(node_count, activation = "relu"))
        
    def customize_middle_layers(self, layers):
        self.layers.extend(layers)
    
    def customize_compile(self,
                          optimizer = "adam",
                          loss = "mean_squared_error",
                          metrics = ["mean_squared_error"]):
        self.optimizer = optimizer
        self.loss = loss
        self.metrics = metrics
    
    def customize_fit(self,
                      batch_size = 100,
                      epochs = 50
                      ):
        self.batch_size = batch_size
        self.epochs = epochs
    
    def build_compile_and_evaluate(self, metric_to_return = "accuracy", selection_criteria = max):
        # final layer must be softmax and outputs 26
        #self.layers.append(Dense(26, activation = "softmax"))
        
        self.model = Sequential(self.layers)
        self.model.compile(optimizer = self.optimizer,
                      loss = self.loss,
                      metrics = self.metrics)
        fit_history = self.model.fit(self.x_dev, self.y_dev,
                                batch_size = self.batch_size,
                                epochs = self.epochs,
                                validation_split = 0.2,
                                verbose = 1
                                )
        self.fit_history = pd.DataFrame(fit_history.history)
        #return selection_criteria(self.fit_history[metric_to_return])
        return self.fit_history
    
    def get_model_summary(self):
        self.model.summary()
        
    def get_fit_history(self):
        return self.fit_history
        
    def evaluate_model_with_test(self):
        return self.model.evaluate(self.x_test, self.y_test, verbose = 0)

In [125]:
d = DNN()
d.customize_fit(epochs = 10)
d.add_one_dense_layer()
d.build_compile_and_evaluate()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Unnamed: 0,loss,mean_squared_error,val_loss,val_mean_squared_error
0,3378401000.0,3378401000.0,3049279000.0,3049279000.0
1,3373679000.0,3373679000.0,3042357000.0,3042357000.0
2,3364260000.0,3364260000.0,3031078000.0,3031078000.0
3,3350474000.0,3350474000.0,3015762000.0,3015762000.0
4,3332664000.0,3332664000.0,2996837000.0,2996837000.0
5,3311519000.0,3311519000.0,2975100000.0,2975099000.0
6,3287416000.0,3287416000.0,2950260000.0,2950260000.0
7,3260577000.0,3260577000.0,2923417000.0,2923417000.0
8,3231628000.0,3231628000.0,2894782000.0,2894782000.0
9,3200972000.0,3200972000.0,2864382000.0,2864382000.0


In [111]:
d = DNN()
d.customize_fit(epochs = 10)
d.add_one_dense_layer(128)
d.add_one_dense_layer(128)
d.build_compile_and_evaluate()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Unnamed: 0,loss,mean_squared_error,val_loss,val_mean_squared_error
0,3368696000.0,3368696000.0,3008419000.0,3008419000.0
1,3210191000.0,3210191000.0,2723589000.0,2723589000.0
2,2858467000.0,2858467000.0,2422231000.0,2422231000.0
3,2652917000.0,2652917000.0,2329241000.0,2329241000.0
4,2563469000.0,2563469000.0,2240113000.0,2240113000.0
5,2463040000.0,2463040000.0,2133426000.0,2133426000.0
6,2357277000.0,2357277000.0,2038428000.0,2038428000.0
7,2273942000.0,2273942000.0,1972151000.0,1972151000.0
8,2222784000.0,2222784000.0,1931485000.0,1931485000.0
9,2184911000.0,2184911000.0,1894756000.0,1894756000.0
