# Model Interest Vector Conversion
###### by Wilson Lee
###### Data Set : China Mobile User Gemographics Source
###### Link : https://www.kaggle.com/chinapage/china-mobile-user-gemographics

This code will convert the model output interest vector of the regression model to a list or coresponding categories.

In [1]:
# import the library
%matplotlib inline

import os
import pandas as pd
import numpy as np
from joblib import dump, load

## Load Data

In [2]:
wDatabaseFolder = "../../Data/Modeling/active_apps_data"

list_of_database = []
for wRoot, wDirs, wFiles in os.walk(wDatabaseFolder):
    for wFilename in wFiles:
        wCurrentFilename = os.path.join(wRoot, wFilename)
        list_of_database.append(pd.read_csv(wCurrentFilename, index_col=None))
        break
            
df_database = pd.concat(list_of_database, axis=0, ignore_index= True)


## View Data

In [3]:
print(df_database.columns.to_list())
df_database.head()

['age', 'day_of_week', 'hour', '3d', '80', '90', 'abroad', 'academic', 'accommodation', 'accounting', 'action', 'activity', 'adventure', 'advice', 'advisory', 'aggregate', 'air', 'amoy', 'animation', 'answer', 'antique', 'app', 'appliance', 'application', 'area', 'around', 'art', 'asia', 'astrology', 'audiobooks', 'automotive', 'aviation', 'avoid', 'baby', 'bank', 'banking', 'based', 'basketball', 'beauty', 'behalf', 'big', 'billards', 'blog', 'bobble', 'book', 'booking', 'box', 'brokerage', 'bus', 'business', 'buy', 'calendar', 'car', 'card', 'care', 'cartoon', 'casual', 'chain', 'channel', 'checkpoint', 'chess', 'child', 'chinese', 'church', 'class', 'classical', 'clock', 'coach', 'collection', 'college', 'comfortable', 'comic', 'commodity', 'community', 'comparing', 'competitive', 'complex', 'condition', 'consumer', 'contact', 'content', 'convenience', 'cool', 'cosplay', 'cost', 'coupon', 'cozy', 'credit', 'crowdfunding', 'cultivation', 'culture', 'custom', 'customization', 'customi

Unnamed: 0,age,day_of_week,hour,3d,80,90,abroad,academic,accommodation,accounting,...,western,wifi,world,xianxia,zombie,zuma,longitude,latitude,gender_F,gender_M
0,31,0,7,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,114.362348,30.871515,1,0
1,31,0,8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,114.362348,30.871515,1,0
2,31,0,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,114.36,30.87,1,0
3,31,0,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,114.36,30.87,1,0
4,40,0,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,124.888462,46.618974,0,1


## Load Categories

In [4]:
df_label_categories = pd.read_csv("../../Data/Modeling/label_categories.csv")
df_label_categories = df_label_categories.loc[:, ~df_label_categories.columns.str.contains('^Unnamed')]

## View Categories

In [5]:
df_label_categories

Unnamed: 0,label_id,category,category-mod,3d,80,90,abroad,academic,accommodation,accounting,...,weather,weibo,weight,west,western,wifi,world,xianxia,zombie,zuma
0,2,game-game type,game game type,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,3,game-Game themes,game game theme,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,game-Art Style,game art style,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,5,game-Leisure time,game leisure time,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,6,game-Cutting things,game cutting thing,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,7,game-Finding fault,game finding fault,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,8,game-stress reliever,game stress reliever,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,9,game-pet,game pet,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,10,game-Answer,game answer,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,11,game-Fishing,game fishing,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Load Model

In [6]:
LinearRegressionModel = load('LinearRegression.joblib')

## Create Prediction

In [7]:
# Input['age', 'day_of_week', 'hour', 'longitude', 'latitude', 'gender_F', 'gender_M']
iAge = 25
iDay_of_Week = 0
iHour = 23
iLongitude = 121.38
iLatitude = 31.24
iSex = "Male"
prediction = LinearRegressionModel.predict([[iAge, iDay_of_Week, iHour, iLongitude, iLatitude, (iSex != "Male"), (iSex == "Male")]])
print(prediction)


[[ 3.63705496e-05  3.38836718e-05  1.42658475e-04  2.21257073e-04
  -9.11265995e-05  1.10443664e-03  7.57077460e-02  2.53286295e-03
   1.64043113e-02  9.02039102e-04  9.23937453e-03  2.37192469e-03
   3.82034898e-03  7.73343106e-02  6.15514345e-03  7.91896364e-03
   6.54406673e-03 -4.35814773e-05  3.08627502e-04  1.19473360e-02
   9.69761182e-03  1.00246883e-03  6.56168997e-04  9.16677069e-04
   1.54059389e-03  9.53132954e-04  2.56163031e-03  2.43341514e-02
   1.55569810e-02  1.40004678e-03  3.94075664e-02  1.70280075e-01
   5.36192888e-01  4.92053751e-04  2.58066032e-04  2.92258437e-03
   1.71027201e-03  2.40328551e-03  9.19379580e-04  1.53360173e-02
   3.97805667e-04  6.44970115e-03  7.71264903e-02  6.92032697e-03
   1.40382944e-02  9.23467833e-03  2.74216438e-03  4.76414796e-02
   1.84480699e-02  5.41088776e-02  1.01184232e-01  2.82256617e-03
   2.03501505e-02  2.86479079e-03  5.56787557e-03  6.89311263e-02
   2.11047078e-03  1.04430121e-02  3.80974400e-02  2.85758680e-03
   5.59152

## Import Interest Vector Conversion Class

In [8]:
import InterestVectorConvertor


## Create Class

In [9]:
wInterestVectorConverter = InterestVectorConvertor.InterestVectorConvertor(df_label_categories)
wInterestVectorConverter.df_Category

Unnamed: 0,category,category-mod,3d,80,90,abroad,academic,accommodation,accounting,action,...,weather,weibo,weight,west,western,wifi,world,xianxia,zombie,zuma
0,game-game type,game game type,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,game-Game themes,game game theme,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,game-Art Style,game art style,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,game-Leisure time,game leisure time,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,game-Cutting things,game cutting thing,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,game-Finding fault,game finding fault,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,game-stress reliever,game stress reliever,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,game-pet,game pet,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,game-Answer,game answer,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,game-Fishing,game fishing,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Convert Prediction

In [10]:
df_LabelCategory_Score = wInterestVectorConverter.convertVector(prediction)

In [11]:
df_LabelCategory_Score

Unnamed: 0,category-mod,category-score
571,property industry new,1.81996
570,property industry,1.800732
376,instant message,1.289762
693,third party card management,1.236696
694,third party payment,1.225262
524,p2p net loan,1.194269
163,custom label,1.177563
387,internet banking,1.081166
348,high risk,1.079637
445,low risk,1.051737


## Save Converter

In [12]:
dump(wInterestVectorConverter, 'InterestVectorConverter.joblib')

['InterestVectorConverter.joblib']

## Load Converter

In [13]:
Converter = load('InterestVectorConverter.joblib')

## Test Loaded Model

In [14]:
# Input['age', 'day_of_week', 'hour', 'longitude', 'latitude', 'gender_F', 'gender_M']
iAge = 25
iDay_of_Week = 0
iHour = 23
iLongitude = 121.38
iLatitude = 31.24
iSex = "Male"
prediction = LinearRegressionModel.predict([[iAge, iDay_of_Week, iHour, iLongitude, iLatitude, (iSex != "Male"), (iSex == "Male")]])
dfList=Converter.convertVector(prediction)
dfList

Unnamed: 0,category-mod,category-score
571,property industry new,1.81996
570,property industry,1.800732
376,instant message,1.289762
693,third party card management,1.236696
694,third party payment,1.225262
524,p2p net loan,1.194269
163,custom label,1.177563
387,internet banking,1.081166
348,high risk,1.079637
445,low risk,1.051737
