## 1. Read the census data using pandas library

In [1]:
import pandas as pd

In [2]:
census = pd.read_csv('Data/census_data.csv')

## 2. Display the head of the dataset

In [3]:
census.head()

Unnamed: 0,age,workclass,education,education_num,marital_status,occupation,relationship,race,gender,capital_gain,capital_loss,hours_per_week,native_country,income_bracket
0,39,State-gov,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


## 3. Convert the label column to 0s and 1s instead of strings

In [4]:
census['income_bracket'].unique()

array([' <=50K', ' >50K'], dtype=object)

In [5]:
def lable_fix(label):
    if label == ' <=50K':
        return 0
    else:
        return 1

In [6]:
census['income_bracket'] = census['income_bracket'].apply(lable_fix)

## 4. Perform the train test split on the data

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
x_data = census.drop('income_bracket' , axis = 1)
y_lables = census['income_bracket']
x_train, x_test, y_train, y_test = train_test_split(x_data, y_lables, test_size=0.3, random_state=101)

In [9]:
census.columns

Index(['age', 'workclass', 'education', 'education_num', 'marital_status',
       'occupation', 'relationship', 'race', 'gender', 'capital_gain',
       'capital_loss', 'hours_per_week', 'native_country', 'income_bracket'],
      dtype='object')

## 5. Create feature columns for categorical values using vocabulary lists or hash buckets

In [10]:
import tensorflow as tf

In [11]:
gender = tf.feature_column.categorical_column_with_vocabulary_list("gender" , ["Female", "Male"])
occupation = tf.feature_column.categorical_column_with_hash_bucket("occupation" , hash_bucket_size=1000)
marital_status = tf.feature_column.categorical_column_with_hash_bucket("marital_status" , hash_bucket_size=1000)
relationship = tf.feature_column.categorical_column_with_hash_bucket("relationship", hash_bucket_size=1000)
education = tf.feature_column.categorical_column_with_hash_bucket("education" ,hash_bucket_size=1000)
workclass = tf.feature_column.categorical_column_with_hash_bucket("workclass", hash_bucket_size=1000)
native_country = tf.feature_column.categorical_column_with_hash_bucket("native_country", hash_bucket_size=1000)

## 6. Create the feature columns for the continuous values using numeric_column

In [12]:
age = tf.feature_column.numeric_column("age")
education_num = tf.feature_column.numeric_column("education_num")
capital_gain = tf.feature_column.numeric_column("capital_gain")
capital_loss = tf.feature_column.numeric_column("capital_loss")
hours_per_week = tf.feature_column.numeric_column("hours_per_week")

## 7. Put all these variables into a single list with variable name feat_cols

In [13]:
feat_cols = [gender, occupation, marital_status, relationship, education, workclass, native_country, age, education_num, 
             capital_gain, capital_loss, hours_per_week]

## 8. Create the input function with batch size and epochs

In [14]:
input_func = tf.compat.v1.estimator.inputs.pandas_input_fn(x=x_train, y=y_train, batch_size=100, num_epochs=None, shuffle=True)





## 9. Create the model with tf.estimator using Linear Classifier

In [15]:
model = tf.estimator.LinearClassifier(feature_columns=feat_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\lahbi\\AppData\\Local\\Temp\\tmpg1g5ptnv', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


## 10. Train the model for atleast 5000 steps

In [16]:
model.train(input_fn=input_func, steps=5000)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\lahbi\AppData\Local\Temp\tmpg1g5ptnv\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoi

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifierV2 at 0x15e0d6c7520>

## 11. Evalution of the model

In [17]:
pred_fn =tf.compat.v1.estimator.inputs.pandas_input_fn(x=x_test, batch_size=len(x_train), shuffle=False)

In [18]:
# Using model.predict() and passing the input function. 
# This will produce a generator of predictions, which can then transformed into list
predictions = list(model.predict(input_fn=pred_fn))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\lahbi\AppData\Local\Temp\tmpg1g5ptnv\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [19]:
# Each item in the list will look like this
predictions[0]

{'logits': array([-1.1218491], dtype=float32),
 'logistic': array([0.24566846], dtype=float32),
 'probabilities': array([0.7543315 , 0.24566846], dtype=float32),
 'class_ids': array([0], dtype=int64),
 'classes': array([b'0'], dtype=object),
 'all_class_ids': array([0, 1]),
 'all_classes': array([b'0', b'1'], dtype=object)}

## 12. Create a list of class_ids key values from the prediction list of dictionaries. These prediction will be used to compare against y_test values 

In [20]:
final_preds = []
for pred in predictions:
    final_preds.append(pred['class_ids'][0])

In [21]:
final_preds[:10]

[0, 0, 0, 0, 1, 0, 0, 0, 0, 0]

## 13. Calculating the model performance on Test Data

In [22]:
from sklearn.metrics import classification_report

In [27]:
# Calculate accuracy
predicted_classes = [p['class_ids'][0] for p in predictions]
correct_predictions = sum(predicted_classes == y_test)
total_predictions = len(y_test)
accuracy = correct_predictions / total_predictions
print("Accuracy:", accuracy)
print(classification_report(y_test, final_preds))

Accuracy: 0.8409253761899887
              precision    recall  f1-score   support

           0       0.89      0.90      0.90      7436
           1       0.67      0.66      0.67      2333

    accuracy                           0.84      9769
   macro avg       0.78      0.78      0.78      9769
weighted avg       0.84      0.84      0.84      9769



In [28]:
x_train

Unnamed: 0,age,workclass,education,education_num,marital_status,occupation,relationship,race,gender,capital_gain,capital_loss,hours_per_week,native_country
20895,22,Private,HS-grad,9,Never-married,Adm-clerical,Own-child,White,Female,0,0,28,United-States
3384,47,Private,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Wife,Black,Female,15024,0,40,United-States
1832,46,Local-gov,Some-college,10,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,24,United-States
18919,46,State-gov,Some-college,10,Divorced,Adm-clerical,Unmarried,White,Female,0,0,48,United-States
31685,60,Private,HS-grad,9,Married-civ-spouse,Adm-clerical,Husband,White,Male,0,0,40,United-States
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5695,63,Private,Some-college,10,Divorced,Sales,Not-in-family,White,Female,0,0,60,United-States
8006,19,Private,HS-grad,9,Never-married,Other-service,Own-child,White,Female,0,0,15,United-States
17745,62,Private,Assoc-voc,11,Married-civ-spouse,Prof-specialty,Husband,White,Male,7298,0,50,United-States
17931,49,Private,Some-college,10,Divorced,Other-service,Not-in-family,White,Male,0,0,40,United-States


In [35]:
y_train


20895    0
3384     1
1832     0
18919    0
31685    0
        ..
5695     0
8006     0
17745    1
17931    0
13151    0
Name: income_bracket, Length: 22792, dtype: int64

In [53]:
# Assuming you have created and trained the model using the `tf.estimator.Estimator` API
# ...

def serving_input_receiver_fn():
    # Define the placeholders for each feature
    age = tf.compat.v1.placeholder(dtype=tf.int64, shape=[1], name='age')
    workclass = tf.compat.v1.placeholder(dtype=tf.string, shape=[None], name='workclass')
    education = tf.compat.v1.placeholder(dtype=tf.string, shape=[None], name='education')
    education_num = tf.compat.v1.placeholder(dtype=tf.int64, shape=[None], name='education_num')
    marital_status = tf.compat.v1.placeholder(dtype=tf.string, shape=[None], name='marital_status')
    occupation = tf.compat.v1.placeholder(dtype=tf.string, shape=[None], name='occupation')
    relationship = tf.compat.v1.placeholder(dtype=tf.string, shape=[None], name='relationship')
    race = tf.compat.v1.placeholder(dtype=tf.string, shape=[None], name='race')
    gender = tf.compat.v1.placeholder(dtype=tf.string, shape=[None], name='gender')
    capital_gain = tf.compat.v1.placeholder(dtype=tf.int64, shape=[None], name='capital_gain')
    capital_loss = tf.compat.v1.placeholder(dtype=tf.int64, shape=[None], name='capital_loss')
    hours_per_week = tf.compat.v1.placeholder(dtype=tf.int64, shape=[None], name='hours_per_week')
    native_country = tf.compat.v1.placeholder(dtype=tf.string, shape=[None], name='native_country')

    # Create a feature dictionary using the placeholders
    feature_spec = {
        'age': age,
        'workclass': workclass,
        'education': education,
        'education_num': education_num,
        'marital_status': marital_status,
        'occupation': occupation,
        'relationship': relationship,
        'race': race,
        'gender': gender,
        'capital_gain': capital_gain,
        'capital_loss': capital_loss,
        'hours_per_week': hours_per_week,
        'native_country': native_country,
    }

    # Create a dictionary containing the features
    features = {
        'age': age,
        'workclass': workclass,
        'education': education,
        'education_num': education_num,
        'marital_status': marital_status,
        'occupation': occupation,
        'relationship': relationship,
        'race': race,
        'gender': gender,
        'capital_gain': capital_gain,
        'capital_loss': capital_loss,
        'hours_per_week': hours_per_week,
        'native_country': native_country,
    }
    
    return tf.estimator.export.ServingInputReceiver(features, feature_spec)




# Export the model with the modified signature
full_path = r'C:\my_saved_model\1690126746'
model.export_saved_model(full_path, serving_input_receiver_fn)


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:Signatures EXCLUDED from export because they cannot be be served via TensorFlow Serving APIs:
INFO:tensorflow:'serving_default' : Classification signatures can only accept a single tensor input of type tf.string. Please check to make sure that you have structured the serving_input_receiver_fn so that it creates a single string placeholder. If your model function expects multiple inputs, then use `tf.io.parse_example()` to parse the string into multiple tensors.
 Received: {'age': <tf.Tensor 'age:0' shape=(1,) dtype=int64>, 'workclass': <tf.Tensor 'workclass:0' shape=(None,) dtype=s

b'C:\\my_saved_model\\1690126746\\1690127283'

In [34]:
import pandas as pd

# Create a new DataFrame for the new user
#0
# new_user = pd.DataFrame({
#     'age': [22],
#     'workclass': [' Private'],
#     'education': [' HS-grad'],
#     'education_num': [9],
#     'marital_status': [' Never-married'],
#     'occupation': [' Adm-clerical'],
#     'relationship': [' Own-child'],
#     'race': [' White'],
#     'gender': [' Female'],
#     'capital_gain': [0],
#     'capital_loss': [0],
#     'hours_per_week': [28],
#     'native_country': [' United-States']
    
# })
#1
new_user = pd.DataFrame({
    'age': [47],
    'workclass': [' Private'],
    'education': [' HS-grad'],
    'education_num': [9],
    'marital_status' : [' Married-civ-spouse'],
    'occupation': ['Machine-op-inspct'],
    'relationship': [' Wife'],
    'race': [' Black'],
    'gender': [' Female'],
    'capital_gain': [15024],
    'capital_loss': [0],
    'hours_per_week': [40],
    'native_country': [' United-States']
})
# Use the predict method of your trained model to get the predicted income category
prediction = model.predict(input_fn=tf.compat.v1.estimator.inputs.pandas_input_fn(new_user, shuffle=False))
predicted_class = [p['class_ids'][0] for p in prediction]

# Print the predicted income category
if predicted_class[0] == 0:
    print('Predicted income category: <=50k')
else:
    print('Predicted income category: >50k')

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\lahbi\AppData\Local\Temp\tmpgpd2za_r\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Predicted income category: >50k
