In [41]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
import pandas as pd
import numpy as np
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from torch import nn, optim
import torch

In [2]:
with open("english10000.txt",'r') as f:
    words = f.read().split("\n")

In [3]:
words[0:5]

['the', 'of', 'and', 'to', 'a']

In [4]:
wordset = set(words)

In [5]:
indexdict = {None: 0}
for i,w in enumerate(words):
    indexdict[w] = i+1

In [6]:
indexdict['center']

216

In [7]:
def get_distribution(txt):
    txtwords = txt.lower().split()
    dist = [0]*(len(words) + 1)
    for w in txtwords:
        if w in wordset:
            dist[indexdict[w]] += 1
        else:
            if w.isalpha():
                dist[0] += 1
    tot = sum(dist)
    return [d/tot for d in dist]

In [8]:
get_distribution("The of and to a adssssssssssssssss a")[0:10]

[0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.14285714285714285,
 0.2857142857142857,
 0.0,
 0.0,
 0.0,
 0.0]

In [9]:
mbti = pd.read_csv("kaggle/mbti.csv")
mbti.head()

Unnamed: 0,type,posts
0,INFJ,http://www.youtube.com/watch?v=qsXHcwe3krw|||h...
1,ENTP,I'm finding the lack of me in these posts very...
2,INTP,Good one _____ https://www.youtube.com/watc...
3,INTJ,"Dear , I enjoyed our conversation the other ..."
4,ENTJ,You're fired.|||That's another silly misconcep...


In [10]:
mbti.columns

Index(['type', 'posts'], dtype='object')

In [11]:
X = np.array([get_distribution(p) for p in mbti['posts']])

In [12]:
Y = np.array(mbti['type'])

In [85]:
rf = RandomForestClassifier(n_estimators=91)

In [86]:
rf.fit(X,Y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=91, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [87]:
joblib.dump(rf,"rf.pkl")

['rf.pkl']

In [88]:
rf.predict([get_distribution("Hello World")])

array(['INFP'], dtype=object)

In [92]:
txt = """

Deep Learning Course Forums

Problem creating custom loss function
fastai users
 
 
turntwo463
David
Mar 12
I am trying to create and use a custom loss function. When my initial attempts failed I decided to take a step back and implement (through cut and paste) the standard loss function used with a unet Learner in my own notebook. I thought this would be a good way to check my understanding of the size of the tensor inputs and see where the inputs differed between the standard loss function and the ones I first created.

To my disappointment my “cut and paste” loss function also does not work in that an exception is thrown during lr_find.

/opt/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   1786     if input.size(0) != target.size(0):
   1787         raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).'
-> 1788                          .format(input.size(0), target.size(0)))
   1789     if dim == 2:
   1790         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

ValueError: Expected input batch_size (65536) to match target batch_size (8192).
I would appreciate some insight into what I am doing wrong.

Initial standard fastai code which does work:

wd=1e-2
learn = unet_learner(data, models.resnet34, metrics=[], wd=wd)
print('Loss func ', learn.loss_func)
Output:
Loss func FlattenedLoss of CrossEntropyLoss()

Here is the code I’ve pasted in (and renamed) that fails.

class MyFlattenedLoss():
    "Same as `func`, but flattens input and target."
    def __init__(self, func, *args, axis:int=-1, floatify:bool=False, is_2d:bool=True, **kwargs):
        self.func,self.axis,self.floatify,self.is_2d = func(*args,**kwargs),axis,floatify,is_2d

    def __repr__(self): return f"My FlattenedLoss of {self.func}"
    @property
    def reduction(self): return self.func.reduction
    @reduction.setter
    def reduction(self, v): self.func.reduction = v

    def __call__(self, input:Tensor, target:Tensor, **kwargs)->Rank0Tensor:
        print('input shape ', input.shape)
        print('target shape ', target.shape)
        
        input = input.transpose(self.axis,-1).contiguous()
        target = target.transpose(self.axis,-1).contiguous()
        
        print('input shape ', input.shape)
        print('target shape ', target.shape)
        
        if self.floatify: target = target.float()
        input = input.view(-1,input.shape[-1]) if self.is_2d else input.view(-1)
        
        print('input shape ', input.shape)
        print('target shape ', target.shape)
        print('floatify', self.floatify, ' 2d ', self.is_2d)
        print('kwargs ', kwargs)
        print('Func ', self.func)
        print('target view ', target.view(-1).shape)
        return self.func.__call__(input, target.view(-1), **kwargs)    
    


def MyCrossEntropyFlat(*args, axis:int=-1, **kwargs):
    "Same as `nn.CrossEntropyLoss`, but flattens input and target."
    return MyFlattenedLoss(nn.CrossEntropyLoss, *args, axis=axis, **kwargs)

wd=1e-2
​learn = unet_learner(data, models.resnet34, metrics=[], wd=wd)
learn.loss_func = MyCrossEntropyFlat()
print('Loss func ', learn.loss_func)
Output:
Loss func My FlattenedLoss of CrossEntropyLoss()

Exception occurs calling lr_find

lr_find(learn)
Note that the learner is setup to use a batch size of 8, there are 256 classes, and the images have been
specified to be resized to [32,32]

The following output is captured before the exception:

input shape  torch.Size([8, 256, 32, 32])
target shape  torch.Size([8, 1, 32, 32])
input shape  torch.Size([8, 256, 32, 32])
target shape  torch.Size([8, 1, 32, 32])
input shape  torch.Size([65536, 32])
target shape  torch.Size([8, 1, 32, 32])
floatify False  2d  True
kwargs  {}
Func  CrossEntropyLoss()
target view  torch.Size([8192])

Reply

created
Mar 12
last reply
Mar 13
2
replies
210
views
2
users
2
likes


renato
Renato Hermoza
Mar 12
Try: learn.loss_func = MyCrossEntropyFlat(axis=1), thats the channel that indicates the labels.

2

Reply

turntwo463
David
Mar 13
Thank you! Specifying the axis index solved the issue.


Reply
  Bookmark   Share   Flag   Reply
 You will be notified if someone mentions your @name or replies to you.
Suggested Topics
Topic	Replies	Views	Activity
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0 1
fastai users
6	1.7k	8d
EfficientNet 
fastai users
12	74	3h
There are 41 unread and 29 new topics remaining, or browse other topics in 
fastai users
"""
rf.predict([get_distribution(txt)])

array(['INTP'], dtype=object)

In [18]:
rf.classes_

array(['ENFJ', 'ENFP', 'ENTJ', 'ENTP', 'ESFJ', 'ESFP', 'ESTJ', 'ESTP',
       'INFJ', 'INFP', 'INTJ', 'INTP', 'ISFJ', 'ISFP', 'ISTJ', 'ISTP'],
      dtype=object)

In [26]:
X1,X2,Y1,Y2 = train_test_split(X,Y,test_size=0.2)

In [74]:
rf4 = RandomForestClassifier(n_estimators=73)

In [75]:
rf4.fit(X1,Y1)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=73, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [76]:
Yp = rf4.predict(X2)

In [77]:
f1_score(Yp,Y2,average='micro')

0.24438040345821327