In [1]:
from sklearn.linear_model import LogisticRegression
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import gc

In [2]:
%%time
train = np.load('./model/bottleneck_features_train.npy')
validation = np.load('./model/bottleneck_features_validation.npy')
train.shape,validation.shape

Wall time: 1.54 s


In [3]:
train.shape,validation.shape

((20000, 7, 7, 1024), (2500, 7, 7, 1024))

In [4]:
train_label = np.array([0]*10000+[1]*10000)
valid_label = np.array([0]*1250+[1]*1250)

In [5]:
plus = np.vstack([train,validation])
plus_label = np.hstack([train_label,valid_label])

In [9]:
plus = plus.reshape(plus.shape[0],-1)
plus.shape

(22500, 50176)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(plus, plus_label, test_size=0.15)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((19125, 50176), (19125,), (3375, 50176), (3375,))

In [11]:
del plus, train, validation
gc.collect()

10

### prdict

In [14]:
model = LogisticRegression(n_jobs=-1, verbose=1, solver='sag')
model

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=-1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=1, warm_start=False)

In [15]:
%%time
model.fit(X_train, y_train)

  " = {}.".format(self.n_jobs))


[LibLinear]Wall time: 41.5 s


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=-1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=1, warm_start=False)

In [17]:
X_test.shape

(3375, 50176)

In [22]:
from sklearn.metrics import accuracy_score, classification_report

In [30]:
print (classification_report(y_test, model.predict(X_test)))

             precision    recall  f1-score   support

          0       0.99      0.98      0.99      1702
          1       0.98      0.99      0.99      1673

avg / total       0.99      0.99      0.99      3375



In [25]:
accuracy_score(y_test, model.predict(X_test))

0.9866666666666667

In [31]:
model.predict(X_test)

array([1, 1, 1, ..., 0, 0, 1])

In [36]:
model.predict_proba(X_test)[:,1]

array([1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
       2.12817595e-12, 6.53425874e-07, 1.00000000e+00])

In [44]:
import keras
from keras.preprocessing.image import array_to_img, img_to_array, load_img, ImageDataGenerator
import math

In [38]:
img_width, img_height = 224,224
pre_model = keras.applications.MobileNet(include_top=False, weights='imagenet', input_shape=(img_width, img_height, 3))

In [43]:
datagen = ImageDataGenerator(rescale=1. / 255)
generator = datagen.flow_from_directory(
        'test/',
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='binary',
        shuffle=False)

Found 12500 images belonging to 1 classes.


In [52]:
generator.filenames[:10]

['test\\1.jpg',
 'test\\10.jpg',
 'test\\100.jpg',
 'test\\1000.jpg',
 'test\\10000.jpg',
 'test\\10001.jpg',
 'test\\10002.jpg',
 'test\\10003.jpg',
 'test\\10004.jpg',
 'test\\10005.jpg']

In [71]:
test_feature = pre_model.predict_generator(
    generator, np.ceil(12500 / 32),
    verbose=1)



In [72]:
test_feature.shape

(12500, 7, 7, 1024)

In [73]:
test_feature = test_feature.reshape(test_feature.shape[0],-1)
test_feature.shape

(12500, 50176)

In [74]:
pred_value = model.predict_proba(test_feature)[:,1]
pred_value.shape

(12500,)

In [75]:
pred_value

array([1.00000000e+00, 3.30782184e-11, 1.25140847e-07, ...,
       9.99999905e-01, 5.03273246e-09, 2.52091573e-05])

In [76]:
len(generator.filenames)

12500

In [79]:
for i in zip(generator.filenames, pred_value):
    print (i)
    break

('test\\1.jpg', 0.9999999999974909)


In [81]:
df = pd.DataFrame(list(zip(generator.filenames, pred_value)))
df.head()

Unnamed: 0,0,1
0,test\1.jpg,1.0
1,test\10.jpg,3.307822e-11
2,test\100.jpg,1.251408e-07
3,test\1000.jpg,1.0
4,test\10000.jpg,1.0


In [110]:
df.index

RangeIndex(start=0, stop=12500, step=1)

In [86]:
df.columns=[['id','label']]
df.shape

(12500, 2)

In [102]:
df['id']=df.iloc[:,0].apply(lambda x: int(x.split('\\')[-1].replace('.jpg','')))

In [121]:
df.to_csv('tmp.csv', index=False)

In [122]:
df2=pd.read_csv('tmp.csv')
df2.shape

(12500, 2)

In [128]:
df3 = df2.sort_values('id')
df3.to_csv('transfer_logic.csv', index=False)

### predict helper func

In [59]:
def helper(start, end):
    result = []
    for p in range(start,end+1):

        result.append([p,final.predict_proba(
            pre_model.predict(
                img_to_array(load_img('./test/test/{}.jpg'.format(p), 
                                      target_size=(224,224,3)
                                     )).reshape(-1,224,224,3)/255.0
            ))[0][0]])
    return result

In [60]:
import pandas as pd

In [61]:
%%time
df = pd.DataFrame(helper(1,12500))
print (df.shape)

(12500, 2)
Wall time: 5min 15s


In [62]:
df.columns=['id','label']

In [63]:
df.to_csv('224_transfer_mobelnet3.csv',index=False)

In [131]:
pd.Series(generator.classes).value_counts()

0    12500
dtype: int64

http://fizzylogic.nl/2017/05/08/monitor-progress-of-your-keras-based-neural-network-using-tensorboard/