In [147]:
require "hdf5"
require "optim"
require "nn"

In [148]:
f = hdf5.open("data.hdf5", "r")

X_train = f:read("X_train"):all()
Y_train = f:read("Y_train"):all()
X_valid = f:read("X_valid"):all()
Y_valid = f:read("Y_valid"):all()
X_test = f:read("X_test"):all()
nwords = f:read("nwords"):all()[1]
nclasses = f:read("nclasses"):all()[1]


--sentences
X_valid_sen = f:read("X_valid_sen"):all()
X_test_sen = f:read("X_test_sen"):all()

--MEMM
X_train_MEMM = f:read("X_train_MEMM"):all()
X_valid_MEMM = f:read("X_valid_MEMM"):all()
X_valid_sen_MEMM = f:read("X_valid_sen_MEMM"):all()
nfeaturesMEMM = f:read("nfeaturesMEMM"):all()[1]

C = nclasses

In [154]:
function process(inputs, numwords)
    processed = inputs:clone()
    for i = 1, inputs:size(1) do
        processed[i][2] = inputs[i][2] + numwords
    end
    return processed
end

function feval(w_new)
    bsize= batch
    local idx = torch.randperm(X_:size(1)):sub(1,bsize)
    local x = torch.Tensor(bsize, X_:size(2))
    local y_ = torch.Tensor(bsize, 1)   
    for i=1,bsize do
        x[i] = X_[idx[i]]
        
        y_[i] = y[idx[i]]
    end   
    y_ = y_:squeeze()
    local inputs = x
    local targets = y_
    -- reset gradients (gradients are always accumulated, to accommodate
    -- batch methods)
    dl_dw:zero()
    -- evaluate the loss function and its derivative with respect to x, given a mini batch
    local prediction = h:forward(inputs)
    local loss_w = mse:forward(prediction, targets)
    h:backward(inputs, mse:backward(prediction, targets))   
    return loss_w, dl_dw
end

-- Calculates log scores of for a MEMM
-- corresponds to the vector y in the lecture notes
-- observations: a sequence of observations, represented as integers
-- i: timestep for the computed score
function score_memm(input, i)
    local logscore = torch.log(nn.SoftMax():forward(h:forward(input:sub(i,i))))
    return logscore:t()
end

-- Viterbi algorithm for MEMM (slight modification).
-- observations: a sequence of observations, represented as integers
-- logscore: the edge scoring function over classes and observations in a history-based model
function viterbi(observations, logscore)
    local initial = torch.zeros(nclasses,1) + .00001
    initial[8] = 1.0
    initial = initial / torch.sum(initial)
    local n = observations:size(1)
    local max_table = torch.Tensor(n, C)
    local backpointer_table = torch.Tensor(n, C)
    -- first timestep
    -- the initial most likely paths are the initial state distribution
    -- NOTE: another unnecessary Tensor allocation here
    local maxes, backpointers = (initial + predict(observations, 1, logscore)):max(2)--torch.log(initial):max(2)--(init+ emi[observations[1]]):max(2)
    max_table[1] = maxes
    -- remaining timesteps ("forwarding" the maxes)
    for i=2,n do
        -- precompute edge scores
        pred  = (predict(observations, i, logscore)):expand(C,C)
        scores = pred + maxes:view(1,C):expand(C,C)--pred + maxes:view(1, C):expand(C, C)
        -- compute new maxes (NOTE: another unnecessary Tensor allocation here)
        maxes, backpointers = scores:max(2)
        -- record
        max_table[i] = maxes
        backpointer_table[i] = backpointers
        end
    -- follow backpointers to recover max path
    local classes = torch.Tensor(n)
    maxes, classes[n] = maxes:max(1)
    for i=n,2,-1 do
        classes[i-1] = backpointer_table[{i, classes[i]}]
    end   
    return classes:sub(1,-1)
end

-- Returns a table of predicted tags for each sequence
-- X : tensor of sequences by word features
function predict_tags(X)
    local predictions = torch.zeros(X:size(1), X:size(2))
    for i=1,X:size(1) do
        local sen = (X[i]:sub(1,torch.nonzero(X[{{i,i},{},{1,1}}]):size(1))):squeeze()
        --print(sen)

        local p = viterbi(sen, score_memm)
        predictions[{{i,i},{1,p:size(1)}}] = p
    end
    return predictions
end

-- Returns counts of retrived relevance, unretrived relevance, and retrived irrelevance for supervised data
-- p_tags : tensor of predicted tags for each sequence
-- y_tags : labeled tags for each sequence
function predict_fscore(p_tags, y_tags)
    local rel_retrived = {[1]=0,[2]=0,[3]=0,[4]=0,[5]=0}--,[6]=0,[7]=0}
    local rel_notretrived = {[1]=0,[2]=0,[3]=0,[4]=0,[5]=0}--,[6]=0,[7]=0}
    local irrel_retrived = {[1]=0,[2]=0,[3]=0,[4]=0,[5]=0}--,[6]=0,[7]=0}
    for i=1,p_tags:size(1) do
        for j=1,p_tags:size(2) do
            for tag=1,5 do        
                if y_tags[i][j] == tag then
                    rel_notretrived[tag] = rel_notretrived[tag] + 1
                    if p_tags[i][j] == tag then
                        rel_retrived[tag] = rel_retrived[tag] + 1
                    else
                        irrel_retrived[tag] = irrel_retrived[tag] + 1
                    end
                end
            end
        end
    end
    local recall = 0
    local precis = 0
    for i=1,5 do
        local r = (rel_retrived[i]/(rel_retrived[i]+rel_notretrived[i]))
        local p = (rel_retrived[i]/(rel_retrived[i]+irrel_retrived[i]))
        print(i, p, r, 2*p*r/(p+r))
        recall = recall + r
        precis = precis + p
    end
    return recall/5,precis/5
end

-- Returns validation tags in format similar to p_tags
-- valid : correct tags in sequence
function format_valid(valid)
    local y_tags = torch.zeros(X_valid_sen:size(1), X_valid_sen:size(2))
    local sen_idx = 0
    local col_idx = 0
    for i=1,valid:size(1) do
        if valid[i] == 8 then
            sen_idx = sen_idx + 1
            col_idx = 1
        end
        y_tags[sen_idx][col_idx] = valid[i]
        col_idx = col_idx + 1
    end
    return y_tags
end

In [200]:
predicc = viterbi(X_valid, score_memm)

In [175]:
X_test_sen:size()

 1646
  110
    1
[torch.LongStorage of size 3]



In [196]:
function format_test(test)
    local y_tags = torch.zeros(X_test_sen:size(1), X_test_sen:size(2))
    local sen_idx = 0
    local col_idx = 0
    for i=1,test:size(1) do
        if test[i] == 8 then
            sen_idx = sen_idx + 1
            col_idx = 1
        end
        y_tags[sen_idx][col_idx] = test[i]
        col_idx = col_idx + 1
    end
    return y_tags
end

In [197]:
to_kaggle=format_test(predicc)

In [203]:
Y_valid:sub(1,20)

 8
 1
 1
 4
 1
 1
 1
 1
 1
 1
 1
 1
 8
 3
 1
 8
 5
 5
 1
 2
[torch.LongTensor of size 20]



In [201]:
predicc:sub(1,20)

 8
 1
 1
 4
 2
 2
 1
 2
 1
 2
 2
 1
 8
 3
 1
 8
 3
 5
 4
 2
[torch.DoubleTensor of size 20]



In [156]:
y_tags = format_valid(Y_valid)
r, p = predict_fscore(predictions, y_tags)
print(r, p)
print(2*r*p/(r+p))

1	0.96391705903658	0.49081352728279	0.65043457008875	
2	0.43394833948339	0.30262480699949	0.35657974530018	
3	0.44389275074479	0.30742778541953	0.36326696464852	
4	0.39702760084926	0.28419452887538	0.33126660761736	
5	0.25244618395303	0.2015625	0.2241529105126	
0.31732462971544	0.49824638681341	
0.38771816800341	


In [155]:
a =X_valid_sen_MEMM
--prediction = viterbi(a[1]:sub(1,12), h)
predictions = predict_tags(a)

In [150]:
for i = 1,X_valid_sen_MEMM:size(1) do
    X_valid_sen_MEMM[i] = process(X_valid_sen_MEMM[i], nwords)
end
    
X_ = process(X_train_MEMM, nwords)
y = Y_train

In [151]:
addB = nn.Add(nclasses)
lookup = nn.LookupTable(nwords+nclasses,nclasses)
sum = nn.Sum(2)
softmax = nn.LogSoftMax()
h = nn.Sequential()
h:add(lookup)
h:add(sum)
h:add(addB)
mse = nn.CrossEntropyCriterion()

w, dl_dw = h:getParameters()

In [194]:
batch = 1000

-- cycle on data
for i = 1,50000 do
    -- train a mini_batch of batchSize in parallel
    _, fs = optim.adam(feval,w)

    if i % 100 == 0 then
        print('loss for iteration ' .. i  .. ' is ' .. fs[1] )
        -- print(sgd_params)
    end
end

loss for iteration 100 is 0.3197730410171	


loss for iteration 200 is 0.32427961382553	


loss for iteration 300 is 0.33960974628709	


loss for iteration 400 is 0.31646359976712	


loss for iteration 500 is 0.34738714773226	


loss for iteration 600 is 0.27876066750273	


loss for iteration 700 is 0.29621635361014	


loss for iteration 800 is 0.28465291901618	


loss for iteration 900 is 0.29055916297841	


loss for iteration 1000 is 0.30594873633813	


loss for iteration 1100 is 0.27895072622516	


loss for iteration 1200 is 0.27494966364293	


loss for iteration 1300 is 0.32331096221067	


loss for iteration 1400 is 0.31051361605597	


loss for iteration 1500 is 0.27619056570747	


loss for iteration 1600 is 0.33335040284448	


loss for iteration 1700 is 0.32486631127217	


loss for iteration 1800 is 0.27377376454303	


loss for iteration 1900 is 0.3196226322078	


loss for iteration 2000 is 0.3032382701171	


loss for iteration 2100 is 0.32596230650918	


loss for iteration 2200 is 0.31011678896383	


loss for iteration 2300 is 0.26284720316057	


loss for iteration 2400 is 0.27222302865857	


loss for iteration 2500 is 0.31372005494439	


loss for iteration 2600 is 0.3020498732	


loss for iteration 2700 is 0.28758896700732	


loss for iteration 2800 is 0.23852212928542	


loss for iteration 2900 is 0.28409555537231	


loss for iteration 3000 is 0.27812604578524	


loss for iteration 3100 is 0.29787844647771	


loss for iteration 3200 is 0.29529332613519	


loss for iteration 3300 is 0.23193881212489	


loss for iteration 3400 is 0.29475769919214	


loss for iteration 3500 is 0.26030472211961	


loss for iteration 3600 is 0.26221846387712	


loss for iteration 3700 is 0.31569988953737	


loss for iteration 3800 is 0.23667441343202	


loss for iteration 3900 is 0.28223008437748	


loss for iteration 4000 is 0.28887201748526	


loss for iteration 4100 is 0.26669039680056	


loss for iteration 4200 is 0.255978020066	


loss for iteration 4300 is 0.23178364568714	


loss for iteration 4400 is 0.2828578738494	


loss for iteration 4500 is 0.22132546231957	


loss for iteration 4600 is 0.30895345633702	


loss for iteration 4700 is 0.26422230413958	


loss for iteration 4800 is 0.26118169420885	


loss for iteration 4900 is 0.28047669198514	


loss for iteration 5000 is 0.23503190957095	


loss for iteration 5100 is 0.22710473510487	


loss for iteration 5200 is 0.29919514576885	


loss for iteration 5300 is 0.25703086532489	


loss for iteration 5400 is 0.22941375016549	


loss for iteration 5500 is 0.29516559570819	


loss for iteration 5600 is 0.22043444366609	


loss for iteration 5700 is 0.24268444215597	


loss for iteration 5800 is 0.23997821002714	


loss for iteration 5900 is 0.20982251574415	


loss for iteration 6000 is 0.25521696837506	


loss for iteration 6100 is 0.25087454720191	


loss for iteration 6200 is 0.23408948534265	


loss for iteration 6300 is 0.23801147353928	


loss for iteration 6400 is 0.25581215511174	


loss for iteration 6500 is 0.24358223970999	


loss for iteration 6600 is 0.2696656963319	


loss for iteration 6700 is 0.23603530915758	


loss for iteration 6800 is 0.22222000809817	


loss for iteration 6900 is 0.18391122956101	


loss for iteration 7000 is 0.21644264182951	


loss for iteration 7100 is 0.25052079322634	


loss for iteration 7200 is 0.19763389636424	


loss for iteration 7300 is 0.22041031651625	


loss for iteration 7400 is 0.20076648887659	


loss for iteration 7500 is 0.19828974440589	


loss for iteration 7600 is 0.2207459149738	


loss for iteration 7700 is 0.21897276746253	


loss for iteration 7800 is 0.21813090983	


loss for iteration 7900 is 0.26414424384778	


loss for iteration 8000 is 0.19524804720603	


loss for iteration 8100 is 0.27082603818304	


loss for iteration 8200 is 0.19944681444799	


loss for iteration 8300 is 0.20865601962675	


loss for iteration 8400 is 0.23035458252007	


loss for iteration 8500 is 0.18612533957674	


loss for iteration 8600 is 0.23368069408288	


loss for iteration 8700 is 0.22972665881271	


loss for iteration 8800 is 0.1868301701751	


loss for iteration 8900 is 0.20318728175505	


loss for iteration 9000 is 0.19667530833236	


loss for iteration 9100 is 0.22340466090463	


loss for iteration 9200 is 0.21286701653591	


loss for iteration 9300 is 0.22059106964111	


loss for iteration 9400 is 0.17203884778111	


loss for iteration 9500 is 0.20621197274084	


loss for iteration 9600 is 0.20841750694734	


loss for iteration 9700 is 0.21399141202207	


loss for iteration 9800 is 0.2047047086268	


loss for iteration 9900 is 0.19486823508001	


loss for iteration 10000 is 0.1909631736554	


loss for iteration 10100 is 0.22153448289946	


loss for iteration 10200 is 0.21297374962923	


loss for iteration 10300 is 0.2045559397521	


loss for iteration 10400 is 0.20438884803524	


loss for iteration 10500 is 0.22565699864406	


loss for iteration 10600 is 0.25114912437796	


loss for iteration 10700 is 0.2031915565881	


loss for iteration 10800 is 0.21080354929415	


loss for iteration 10900 is 0.17354629030299	


loss for iteration 11000 is 0.20671732394166	


loss for iteration 11100 is 0.22909994758415	


loss for iteration 11200 is 0.22369452797503	


loss for iteration 11300 is 0.2214247096509	


loss for iteration 11400 is 0.20567894052484	


loss for iteration 11500 is 0.19355554904488	


loss for iteration 11600 is 0.19421222270952	


loss for iteration 11700 is 0.19680514673068	


loss for iteration 11800 is 0.18359951011467	


loss for iteration 11900 is 0.16885550899749	


loss for iteration 12000 is 0.23227014212682	


loss for iteration 12100 is 0.17072948453538	


loss for iteration 12200 is 0.19888512500101	


loss for iteration 12300 is 0.18263652253816	


loss for iteration 12400 is 0.19189321833739	


loss for iteration 12500 is 0.1456155719301	


loss for iteration 12600 is 0.21838424874321	


loss for iteration 12700 is 0.17633098371575	


loss for iteration 12800 is 0.18702456282515	


loss for iteration 12900 is 0.22300723843284	


loss for iteration 13000 is 0.16150490692656	


loss for iteration 13100 is 0.16322810962007	


loss for iteration 13200 is 0.16670082240439	


loss for iteration 13300 is 0.22669183022526	


loss for iteration 13400 is 0.15671890363305	


loss for iteration 13500 is 0.18509492954653	


loss for iteration 13600 is 0.22555370678018	


loss for iteration 13700 is 0.15885813365837	


loss for iteration 13800 is 0.20445309317693	


loss for iteration 13900 is 0.1456557454866	


loss for iteration 14000 is 0.15840541287425	


loss for iteration 14100 is 0.16242715725612	


loss for iteration 14200 is 0.15705344830664	


loss for iteration 14300 is 0.18121416625576	


loss for iteration 14400 is 0.15470522864432	


loss for iteration 14500 is 0.14643945843287	


loss for iteration 14600 is 0.17125419028324	


loss for iteration 14700 is 0.20763327427813	


loss for iteration 14800 is 0.15881704575466	


loss for iteration 14900 is 0.12489404241246	


loss for iteration 15000 is 0.21545546007626	


loss for iteration 15100 is 0.21367874816022	


loss for iteration 15200 is 0.12773589417548	


loss for iteration 15300 is 0.1782497954128	


loss for iteration 15400 is 0.15072336458582	


loss for iteration 15500 is 0.18963945852871	


loss for iteration 15600 is 0.18334276639025	


loss for iteration 15700 is 0.15467080494511	


loss for iteration 15800 is 0.18077957708565	


loss for iteration 15900 is 0.17333320726578	


loss for iteration 16000 is 0.16267753297211	


loss for iteration 16100 is 0.14942125518009	


loss for iteration 16200 is 0.16269903783778	


loss for iteration 16300 is 0.20842081233281	


loss for iteration 16400 is 0.15506945875644	


loss for iteration 16500 is 0.18872043604626	


loss for iteration 16600 is 0.11743523352852	


loss for iteration 16700 is 0.14035948515867	


loss for iteration 16800 is 0.16678176135239	


loss for iteration 16900 is 0.14571056233203	


loss for iteration 17000 is 0.16040378122588	


loss for iteration 17100 is 0.16753084180415	


loss for iteration 17200 is 0.19040157688769	


loss for iteration 17300 is 0.14276804310541	


loss for iteration 17400 is 0.16519926743786	


loss for iteration 17500 is 0.14585461438985	


loss for iteration 17600 is 0.18070434973108	


loss for iteration 17700 is 0.1447396998893	


loss for iteration 17800 is 0.14333564614414	


loss for iteration 17900 is 0.15134652082705	


loss for iteration 18000 is 0.18359364162571	


loss for iteration 18100 is 0.1514913845595	


loss for iteration 18200 is 0.20822450235692	


loss for iteration 18300 is 0.16117588770283	


loss for iteration 18400 is 0.12685391361298	


loss for iteration 18500 is 0.17922837012125	


loss for iteration 18600 is 0.19465953112875	


loss for iteration 18700 is 0.15931359954299	


loss for iteration 18800 is 0.18440818275083	


loss for iteration 18900 is 0.14241027580328	


loss for iteration 19000 is 0.1487757085696	


loss for iteration 19100 is 0.16298294824537	


loss for iteration 19200 is 0.14773970891053	


loss for iteration 19300 is 0.14176789662199	


loss for iteration 19400 is 0.15683701742674	


loss for iteration 19500 is 0.15492237981045	


loss for iteration 19600 is 0.13356323383163	


loss for iteration 19700 is 0.16719384547063	


loss for iteration 19800 is 0.16572781940914	


loss for iteration 19900 is 0.17062822516165	


loss for iteration 20000 is 0.19546154827536	


loss for iteration 20100 is 0.14456507441882	


loss for iteration 20200 is 0.18006525288617	


loss for iteration 20300 is 0.2064947482277	


loss for iteration 20400 is 0.14350518850355	


loss for iteration 20500 is 0.18175761394932	


loss for iteration 20600 is 0.17601413362958	


loss for iteration 20700 is 0.14540031074687	


loss for iteration 20800 is 0.13645662534363	


loss for iteration 20900 is 0.18158767566913	


loss for iteration 21000 is 0.1947722933984	


loss for iteration 21100 is 0.13123419102139	


loss for iteration 21200 is 0.14657766265356	


loss for iteration 21300 is 0.16302267335778	


loss for iteration 21400 is 0.14946755228662	


loss for iteration 21500 is 0.15860361888326	


loss for iteration 21600 is 0.15692885224936	


loss for iteration 21700 is 0.18099497316925	


loss for iteration 21800 is 0.13471644942119	


loss for iteration 21900 is 0.1523755550881	


loss for iteration 22000 is 0.18308815468215	


loss for iteration 22100 is 0.16739708894535	


loss for iteration 22200 is 0.18276669786818	


loss for iteration 22300 is 0.1481685146531	


loss for iteration 22400 is 0.158300736286	


loss for iteration 22500 is 0.12069801254944	


loss for iteration 22600 is 0.14194345394278	


loss for iteration 22700 is 0.18691118987804	


loss for iteration 22800 is 0.12588995030574	


loss for iteration 22900 is 0.15189288957283	


loss for iteration 23000 is 0.1707031176543	


loss for iteration 23100 is 0.14891020024087	


loss for iteration 23200 is 0.1376160594315	


loss for iteration 23300 is 0.12245678155878	


loss for iteration 23400 is 0.097772011298816	


loss for iteration 23500 is 0.14338655861028	


loss for iteration 23600 is 0.1349093340968	


loss for iteration 23700 is 0.13019402919461	


loss for iteration 23800 is 0.14067581637407	


loss for iteration 23900 is 0.16556018956252	


loss for iteration 24000 is 0.1418793827863	


loss for iteration 24100 is 0.11444901072891	


loss for iteration 24200 is 0.13695703108501	


loss for iteration 24300 is 0.14591485310157	


loss for iteration 24400 is 0.11015504368751	


loss for iteration 24500 is 0.2152732543013	


loss for iteration 24600 is 0.1636200134587	


loss for iteration 24700 is 0.16008806873292	


loss for iteration 24800 is 0.14456409316351	


loss for iteration 24900 is 0.13534183941192	


loss for iteration 25000 is 0.10589689244205	


loss for iteration 25100 is 0.15240526722754	


loss for iteration 25200 is 0.14918397310929	


loss for iteration 25300 is 0.11732362976425	


loss for iteration 25400 is 0.15907090579796	


loss for iteration 25500 is 0.1393492022309	


loss for iteration 25600 is 0.12957031106993	


loss for iteration 25700 is 0.13353767390697	




loss for iteration 25800 is 0.13167704738164	


loss for iteration 25900 is 0.14002386231941	


loss for iteration 26000 is 0.1940269315687	


loss for iteration 26100 is 0.13460001996499	


loss for iteration 26200 is 0.11199110865895	


loss for iteration 26300 is 0.17020106490276	


loss for iteration 26400 is 0.12999758809038	


loss for iteration 26500 is 0.15765858110921	


loss for iteration 26600 is 0.15708543751166	


loss for iteration 26700 is 0.1263336357096	


loss for iteration 26800 is 0.15287213274602	


loss for iteration 26900 is 0.11804633500703	


loss for iteration 27000 is 0.14845355560502	


loss for iteration 27100 is 0.12099238634738	


loss for iteration 27200 is 0.12775409193696	


loss for iteration 27300 is 0.11658831973557	


loss for iteration 27400 is 0.098245225560191	


loss for iteration 27500 is 0.09453169120348	


loss for iteration 27600 is 0.14087799092537	


loss for iteration 27700 is 0.12546856165803	


loss for iteration 27800 is 0.11350832987947	


loss for iteration 27900 is 0.15094496298864	


loss for iteration 28000 is 0.14898043164077	


loss for iteration 28100 is 0.12624007711841	




loss for iteration 28200 is 0.14338129743653	


loss for iteration 28300 is 0.13095954387057	


loss for iteration 28400 is 0.099826641202042	


loss for iteration 28500 is 0.1269678772598	


loss for iteration 28600 is 0.12002891789558	


loss for iteration 28700 is 0.13893056450517	


loss for iteration 28800 is 0.12499727354628	


loss for iteration 28900 is 0.12240392972969	


loss for iteration 29000 is 0.11788431500175	


loss for iteration 29100 is 0.11064053196277	


loss for iteration 29200 is 0.11959778092292	


loss for iteration 29300 is 0.12901305874786	


loss for iteration 29400 is 0.12762066269718	


loss for iteration 29500 is 0.14853904563259	


loss for iteration 29600 is 0.13677968388664	


loss for iteration 29700 is 0.12449638566811	


loss for iteration 29800 is 0.11336490317435	


loss for iteration 29900 is 0.11970459096869	


loss for iteration 30000 is 0.13386656782615	


loss for iteration 30100 is 0.12071113490635	


loss for iteration 30200 is 0.13479191410486	


loss for iteration 30300 is 0.16422860182828	


loss for iteration 30400 is 0.11331936299004	


loss for iteration 30500 is 0.12631489002069	


loss for iteration 30600 is 0.15613905883522	


loss for iteration 30700 is 0.13754226399876	


loss for iteration 30800 is 0.11904763889557	


loss for iteration 30900 is 0.15598014396164	


loss for iteration 31000 is 0.14780514318814	


loss for iteration 31100 is 0.11461733192238	


loss for iteration 31200 is 0.098719031720061	


loss for iteration 31300 is 0.18426457983408	


loss for iteration 31400 is 0.12470003677538	


loss for iteration 31500 is 0.16105663343566	


loss for iteration 31600 is 0.15399814207717	


loss for iteration 31700 is 0.11064046316003	


loss for iteration 31800 is 0.13656625188279	


loss for iteration 31900 is 0.12338928425784	


loss for iteration 32000 is 0.14079319298537	


loss for iteration 32100 is 0.14914004394967	


loss for iteration 32200 is 0.11733145907487	


loss for iteration 32300 is 0.12870260063577	




loss for iteration 32400 is 0.10959886080568	


loss for iteration 32500 is 0.13392677341012	


loss for iteration 32600 is 0.1039995713494	


loss for iteration 32700 is 0.096465770299346	


loss for iteration 32800 is 0.14023411402246	


loss for iteration 32900 is 0.1302590255775	


loss for iteration 33000 is 0.14262401360077	


loss for iteration 33100 is 0.10557936399095	


loss for iteration 33200 is 0.10428476991557	


loss for iteration 33300 is 0.13331217221174	


loss for iteration 33400 is 0.15047356699782	


loss for iteration 33500 is 0.099170848735891	


loss for iteration 33600 is 0.093758893610924	


loss for iteration 33700 is 0.12199461112119	


loss for iteration 33800 is 0.14502059723015	


loss for iteration 33900 is 0.07215914623609	


loss for iteration 34000 is 0.16403695526128	


loss for iteration 34100 is 0.11821929181434	


loss for iteration 34200 is 0.10759031024822	


loss for iteration 34300 is 0.086307972750355	


loss for iteration 34400 is 0.10338335451064	


loss for iteration 34500 is 0.11178729027773	


loss for iteration 34600 is 0.11614652013786	


loss for iteration 34700 is 0.13961251448912	


loss for iteration 34800 is 0.081595403315089	


loss for iteration 34900 is 0.099442083836155	


loss for iteration 35000 is 0.074306425112079	


loss for iteration 35100 is 0.13751283852053	


loss for iteration 35200 is 0.092365058549382	


loss for iteration 35300 is 0.12351826069512	


loss for iteration 35400 is 0.11412185378238	


loss for iteration 35500 is 0.13825702723244	


loss for iteration 35600 is 0.0965240875296	


loss for iteration 35700 is 0.12398668839064	


loss for iteration 35800 is 0.11151438398104	


loss for iteration 35900 is 0.10984012311271	


loss for iteration 36000 is 0.16298525256355	




loss for iteration 36100 is 0.12428456817377	


loss for iteration 36200 is 0.10231543140978	


loss for iteration 36300 is 0.11863116050364	


loss for iteration 36400 is 0.096001328843619	


loss for iteration 36500 is 0.11312577095453	


loss for iteration 36600 is 0.12903648499616	


loss for iteration 36700 is 0.15457924580274	


loss for iteration 36800 is 0.088620346429101	


loss for iteration 36900 is 0.091773063651627	


loss for iteration 37000 is 0.12234076421001	


loss for iteration 37100 is 0.13692615502692	


loss for iteration 37200 is 0.13110545745908	


loss for iteration 37300 is 0.11771983285491	


loss for iteration 37400 is 0.10915452191635	


loss for iteration 37500 is 0.10650901793057	


loss for iteration 37600 is 0.089606329820489	


loss for iteration 37700 is 0.1152231002901	


loss for iteration 37800 is 0.090584557844392	


loss for iteration 37900 is 0.095250754536127	


loss for iteration 38000 is 0.15009417858585	


loss for iteration 38100 is 0.085638564840361	


loss for iteration 38200 is 0.10850992960942	


loss for iteration 38300 is 0.14045994773252	


loss for iteration 38400 is 0.10705472622385	


loss for iteration 38500 is 0.11617629865827	


loss for iteration 38600 is 0.13229551727967	


loss for iteration 38700 is 0.12789135627725	


loss for iteration 38800 is 0.085612049625358	


loss for iteration 38900 is 0.089515627426295	


loss for iteration 39000 is 0.11187152933602	


loss for iteration 39100 is 0.103091571007	


loss for iteration 39200 is 0.08576709383777	


loss for iteration 39300 is 0.1201076578722	


loss for iteration 39400 is 0.10568477974804	


loss for iteration 39500 is 0.13068261851709	


loss for iteration 39600 is 0.13401661086672	


loss for iteration 39700 is 0.090631160163171	


loss for iteration 39800 is 0.087948017132176	


loss for iteration 39900 is 0.13516080819873	


loss for iteration 40000 is 0.1338497370921	


loss for iteration 40100 is 0.13629061900392	


loss for iteration 40200 is 0.11060706748235	


loss for iteration 40300 is 0.15397683590304	


loss for iteration 40400 is 0.12194542635967	


loss for iteration 40500 is 0.11257252184266	


loss for iteration 40600 is 0.12225663353331	


loss for iteration 40700 is 0.10269309800698	


loss for iteration 40800 is 0.12283715950004	


loss for iteration 40900 is 0.081475902334591	


loss for iteration 41000 is 0.085645967907107	


loss for iteration 41100 is 0.10589284729971	


loss for iteration 41200 is 0.088525177772286	


loss for iteration 41300 is 0.099946197290226	


loss for iteration 41400 is 0.10456605395119	


loss for iteration 41500 is 0.091635072439661	


loss for iteration 41600 is 0.12082968251839	


loss for iteration 41700 is 0.11896244266286	


loss for iteration 41800 is 0.14306869338413	


loss for iteration 41900 is 0.10456191992858	


loss for iteration 42000 is 0.13747299589104	


loss for iteration 42100 is 0.095980403148807	


loss for iteration 42200 is 0.1028313606924	


loss for iteration 42300 is 0.10356230980722	


loss for iteration 42400 is 0.1059549492275	


loss for iteration 42500 is 0.094187634177871	


loss for iteration 42600 is 0.11829781031854	


loss for iteration 42700 is 0.076898629926509	


loss for iteration 42800 is 0.083308575405655	


loss for iteration 42900 is 0.075999015240552	


loss for iteration 43000 is 0.15521511792621	


loss for iteration 43100 is 0.10930878017132	


loss for iteration 43200 is 0.11642629340514	


loss for iteration 43300 is 0.16767020820709	


loss for iteration 43400 is 0.09580130867563	


loss for iteration 43500 is 0.14144570049884	


loss for iteration 43600 is 0.095253227357076	


loss for iteration 43700 is 0.096657335164058	


loss for iteration 43800 is 0.10568762660269	


loss for iteration 43900 is 0.12345881551697	


loss for iteration 44000 is 0.10317821012795	


loss for iteration 44100 is 0.11202445629722	


loss for iteration 44200 is 0.11652459043669	


loss for iteration 44300 is 0.12774925725621	


loss for iteration 44400 is 0.10357777957973	


loss for iteration 44500 is 0.098805746902371	


loss for iteration 44600 is 0.10625783871141	


loss for iteration 44700 is 0.093326502004409	


loss for iteration 44800 is 0.10332919052451	


loss for iteration 44900 is 0.091455841585856	


loss for iteration 45000 is 0.1048789962068	


loss for iteration 45100 is 0.106097832814	


loss for iteration 45200 is 0.11022621920457	


loss for iteration 45300 is 0.14863689825018	


loss for iteration 45400 is 0.093641989004198	


loss for iteration 45500 is 0.070070583658378	


loss for iteration 45600 is 0.10074607542562	


loss for iteration 45700 is 0.080385844869785	


loss for iteration 45800 is 0.0749223232187	


loss for iteration 45900 is 0.086824654166981	


loss for iteration 46000 is 0.1384807875962	


loss for iteration 46100 is 0.09242599952769	


loss for iteration 46200 is 0.088037097193367	


loss for iteration 46300 is 0.086943884579682	


loss for iteration 46400 is 0.13963996945662	


loss for iteration 46500 is 0.10010796823583	


loss for iteration 46600 is 0.10635998052683	


loss for iteration 46700 is 0.10966289045986	


loss for iteration 46800 is 0.071888691210326	




loss for iteration 46900 is 0.15174086909516	


loss for iteration 47000 is 0.064971832303805	


loss for iteration 47100 is 0.093359922811906	


loss for iteration 47200 is 0.12216887044277	


loss for iteration 47300 is 0.088529152047067	


loss for iteration 47400 is 0.12218938816969	


loss for iteration 47500 is 0.10075801336418	


loss for iteration 47600 is 0.11661802639423	


loss for iteration 47700 is 0.10121255406087	


loss for iteration 47800 is 0.090466336109285	


loss for iteration 47900 is 0.081174750884197	


loss for iteration 48000 is 0.11000629048259	


loss for iteration 48100 is 0.092055615765665	


loss for iteration 48200 is 0.094841170508173	


loss for iteration 48300 is 0.10429707720286	




loss for iteration 48400 is 0.098375171570406	


loss for iteration 48500 is 0.070862363857931	


loss for iteration 48600 is 0.068450092303612	


loss for iteration 48700 is 0.068789698000651	


loss for iteration 48800 is 0.0829998323558	


loss for iteration 48900 is 0.093283850076842	


loss for iteration 49000 is 0.09627676563845	


loss for iteration 49100 is 0.10907397739318	


loss for iteration 49200 is 0.092562139270019	


loss for iteration 49300 is 0.11781006532023	


loss for iteration 49400 is 0.11998088658602	


loss for iteration 49500 is 0.099569278129868	


loss for iteration 49600 is 0.11861610649552	


loss for iteration 49700 is 0.084924652964964	


loss for iteration 49800 is 0.10621075707921	


loss for iteration 49900 is 0.084957801565486	


loss for iteration 50000 is 0.10754964070079	


In [192]:
-- Writes to file in Kaggle prediction format
-- p : prediction tensor
-- fname : output filename
function kaggle_format(p, fname)
    local f = io.open("predictions/" .. fname, "w")
    f:write("ID,Labels\n")
    for i=1,p:size(1) do
        local s = ""
        local t = {[1]="O",[2]="PER",[3]="LOC",[4]="ORG",[5]="MISC",[6]="MISC",[7]="LOC",[8]="O",[9]="O"}
        local prev_tag = 0
        local prev_idx = 0
        for j=2,p:size(2) do
            if p[i][j] == 0 then
                break
            end
            if p[i][j] ~= 1 then
                if prev_tag == 0 then
                    s = s .. t[p[i][j]] .. "-" .. tostring(j-1)
                elseif p[i][j] == prev_tag and prev_idx == j-1 then
                    s = s .. "-" .. tostring(j-1) 
                else
                    s = s .. " " .. t[p[i][j]] .. "-" .. tostring(j-1)
                end
                prev_tag = p[i][j]
                prev_idx = j
            end
        end
        f:write(i .. "," .. s .. "\n")
    end
    f:close()
end

In [198]:
kaggle_format(to_kaggle, "pred_" .. tostring(os.time()) .. ".csv")




In [82]:
function predict_tags(X)
    local predictions = torch.zeros(X:size(1), X:size(2))
    for i=1,X:size(1) do
        local sen = (X[i]:sub(1,torch.nonzero(X[{{i,i},{},{1,1}}]):size(1))):squeeze()
        local p = viterbi(sen, score_memm)
        predictions[{{i,i},{1,p:size(1)}}] = p
    end
    return predictions
end

 1646
  110
    1
[torch.LongStorage of size 3]

