# Entropy "model"

## Imports

In [1]:
from glob import glob
import json
import numpy as np
from os.path import basename
import pandas as pd
import PIL
import scipy.stats
import skimage.filters.rank 
from skimage.morphology import disk

from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('fivethirtyeight')
sns.set_style("dark", {'axes.grid' : False})
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

## Load data

In [2]:
app_ui_exp = pd.read_pickle('../clean_data/app_ui_exp.pkl')
app_ui_exp.shape

(70959, 12)

In [3]:
app_ui_exp.head(2)

Unnamed: 0,app_package_name,app_names,app_category,star_rating_ave,average_rating_ave,min_ratings_ave,filename_list,filename_num,filename_exp,img_w,img_h,aspect_ratio
0,B4A.BigFivePersonalityTest,Big 5 Personality Test,Education,3.6,6698.0,1000000.0,[../app_uis/49783.jpg],1,../app_uis/49783.jpg,1080,1920,0.5625
1,CN.MyPrivateMessages,Calculator,Communication,4.3,16952.333333,1000000.0,[../app_uis/26083.jpg],1,../app_uis/26083.jpg,1080,1920,0.5625


#### Resize screenshots to a more manageable size for entropy calculation using below script

In [4]:
! cat ../scripts/resize.py

from glob import glob
from PIL import Image
from os.path import basename

source_path = '../app_uis/'
dest_path = '../app_uis_resized/'

# loop over images and resize
for i, x in enumerate(glob(source_path + '*.jpg')):
    try:
        img = Image.open(x)
        size = img.size
        if size[0] < size[1]:
            img_resized = img.resize((270, 480), Image.ANTIALIAS)
        else:
            img_resized = img.resize((480, 270), Image.ANTIALIAS)
        img_resized.save(dest_path + basename(x))
    except OSError as e:
        pass
    if i % 1000 == 0:
        print(i)


#### Add column in dataframe for filepaths to resized images

In [5]:
app_ui_exp['resized_file'] = app_ui_exp.filename_exp.map(lambda x: '../app_uis_resized/' + basename(x))
app_ui_exp.head()

Unnamed: 0,app_package_name,app_names,app_category,star_rating_ave,average_rating_ave,min_ratings_ave,filename_list,filename_num,filename_exp,img_w,img_h,aspect_ratio,resized_file
0,B4A.BigFivePersonalityTest,Big 5 Personality Test,Education,3.6,6698.0,1000000.0,[../app_uis/49783.jpg],1,../app_uis/49783.jpg,1080,1920,0.5625,../app_uis_resized/49783.jpg
1,CN.MyPrivateMessages,Calculator,Communication,4.3,16952.333333,1000000.0,[../app_uis/26083.jpg],1,../app_uis/26083.jpg,1080,1920,0.5625,../app_uis_resized/26083.jpg
2,DOCECG2.doctor,Electrocardiogram ECG Types,Medical,3.9,4134.5,500000.0,[../app_uis/51036.jpg],1,../app_uis/51036.jpg,1080,1920,0.5625,../app_uis_resized/51036.jpg
3,Gecko.Droid.PhysicsHelper,Pocket Physics,Education,4.1,15938.0,1000000.0,"[../app_uis/29868.jpg, ../app_uis/29869.jpg, ....",5,../app_uis/29868.jpg,1080,1920,0.5625,../app_uis_resized/29868.jpg
4,Gecko.Droid.PhysicsHelper,Pocket Physics,Education,4.1,15938.0,1000000.0,"[../app_uis/29868.jpg, ../app_uis/29869.jpg, ....",5,../app_uis/29869.jpg,1080,1920,0.5625,../app_uis_resized/29869.jpg


## Calculate histograms & entropies 

#### Sample data

In [None]:
app_ui_exp.sample()

#### Add column in dataframe for grayscale histograms

In [20]:
def gs_histogram(filepath):
    return PIL.Image.open(filepath).convert('L').histogram()

In [23]:
app_ui_exp['gs_histogram'] = app_ui_exp.resized_file.map(lambda x: gs_histogram(x))

In [24]:
app_ui_exp.head(2)

Unnamed: 0,app_package_name,app_names,app_category,star_rating_ave,average_rating_ave,min_ratings_ave,filename_list,filename_num,filename_exp,img_w,img_h,aspect_ratio,resized_file,gs_histogram
0,B4A.BigFivePersonalityTest,Big 5 Personality Test,Education,3.6,6698.0,1000000.0,[../app_uis/49783.jpg],1,../app_uis/49783.jpg,1080,1920,0.5625,../app_uis_resized/49783.jpg,"[7495, 945, 498, 298, 210, 567, 162, 145, 106,..."
1,CN.MyPrivateMessages,Calculator,Communication,4.3,16952.333333,1000000.0,[../app_uis/26083.jpg],1,../app_uis/26083.jpg,1080,1920,0.5625,../app_uis_resized/26083.jpg,"[9609, 626, 310, 1090, 264, 699, 1081, 426, 41..."


In [26]:
len(app_ui_exp.gs_histogram[0])

256

#### Add column in dataframe for grayscale entropies

In [27]:
def ui_entropy(vector):
    probabilities = [(float(i)/sum(vector)) for i in vector]
    return -sum([p * np.log2(p) for p in probabilities if p != 0])

In [28]:
app_ui_exp['gs_ent'] = app_ui_exp.gs_histogram.map(lambda x: ui_entropy(x))

In [29]:
app_ui_exp.head(2)

Unnamed: 0,app_package_name,app_names,app_category,star_rating_ave,average_rating_ave,min_ratings_ave,filename_list,filename_num,filename_exp,img_w,img_h,aspect_ratio,resized_file,gs_histogram,gs_ent
0,B4A.BigFivePersonalityTest,Big 5 Personality Test,Education,3.6,6698.0,1000000.0,[../app_uis/49783.jpg],1,../app_uis/49783.jpg,1080,1920,0.5625,../app_uis_resized/49783.jpg,"[7495, 945, 498, 298, 210, 567, 162, 145, 106,...",4.2896
1,CN.MyPrivateMessages,Calculator,Communication,4.3,16952.333333,1000000.0,[../app_uis/26083.jpg],1,../app_uis/26083.jpg,1080,1920,0.5625,../app_uis_resized/26083.jpg,"[9609, 626, 310, 1090, 264, 699, 1081, 426, 41...",6.826821


#### Add column in dataframe for color histograms

In [30]:
def c_histogram(filepath):
    return PIL.Image.open(filepath).histogram()

In [None]:
app_ui_exp['c_histogram'] = app_ui_exp.resized_file.map(lambda x: c_histogram(x))