# Phase 4: Train the single models

In [1]:
########################################
## import packages
########################################
import os
import re
import csv
import codecs
import numpy as np
np.random.seed(1337)

import tensorflow as tf

import pandas as pd
import operator
import sys

from string import punctuation
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

from iwillwin.trainer.supervised_trainer import KerasModelTrainer
from iwillwin.data_utils.data_helpers import DataTransformer, DataLoader
from iwillwin.model.sim_zoos import *
import tensorflow as tf
from keras.layers import Dense, Input, MaxPooling1D, CuDNNLSTM, Embedding, Add, Lambda, Dropout, Activation, SpatialDropout1D, Reshape, GlobalAveragePooling1D, merge, Flatten, Bidirectional, CuDNNGRU, add, Conv1D, GlobalMaxPooling1D
from keras.layers.merge import concatenate
from keras.models import Model
from keras import optimizers
from keras import initializers
from keras.engine import InputSpec, Layer
from iwillwin.config import dataset_config, model_config
from keras.models import Sequential
from keras.layers.embeddings import Embedding
from keras.layers.core import Lambda, Dense, Dropout
from keras.layers.recurrent import LSTM, GRU
from keras.layers.wrappers import Bidirectional
from keras.legacy.layers import Highway
from keras.layers import TimeDistributed
from keras.layers.normalization import BatchNormalization
import keras.backend as K

from sklearn.metrics import roc_auc_score, log_loss
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import log_loss

Using TensorFlow backend.


In [3]:
NB_WORDS = 100000
EMBEDDING_DIM = 300
MAX_SEQUENCE_LENGTH = 30
OUT_SIZE = 1

## Load and prepare the data

In [4]:
data_transformer = DataTransformer(max_num_words=NB_WORDS, max_sequence_length=MAX_SEQUENCE_LENGTH, char_level=False,
                                   normalization=True, features_processed=True)
trains, tests, labels = data_transformer.prepare_data(dual=False)
print("Number of unique words", len(data_transformer.tokenizer.index_docs))

Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\zake7\AppData\Local\Temp\jieba.cache
Loading model cost 0.483 seconds.
Prefix dict has been built succesfully.


[DataHelper] Apply normalization on value-type columns




Doing preprocessing...
Transforming words to indices...
Shape of data tensor: (320552, 30) (320552, 30)
Shape of label tensor: (320552,)
Preprocessed.
Number of unique words 83265


In [5]:
from keras.utils import to_categorical
from sklearn.utils import class_weight
labels = to_categorical(labels)

## Prepare word embedding

In [6]:
print("Embeddings")
print(os.listdir("../data/wordvec"))

Embeddings
['.gitkeep', 'sgns.merge.bigram', 'temp.txt', 'Tencent_AILab_ChineseEmbedding.tar', 'Tencent_AILab_ChineseEmbedding.txt']


In [7]:
data_loader = DataLoader()
sgns_bigram_embedding = data_loader.load_embedding('../data/wordvec/sgns.merge.bigram')
tencent_ai_embedding = data_loader.load_embedding('../data/wordvec/Tencent_AILab_ChineseEmbedding.txt')

Total 1284313 word vectors.
Err on  ['中共中央', '国务院关于完善产权保护制度依法保护产权的意见']
Err on  ['杨', '光']
Err on  ['王', '琪']
Err on  ['食品安全国家标准', '食品添加剂使用标准']
Err on  ['共担时代责任', '共促全球发展']
Err on  ['三藏不忘本', '四圣试禅心']
Err on  ['坚持开放包容', '推动联动增长']
Err on  ['陷虎穴金星解厄', '双叉岭伯钦留僧']
Err on  ['贾夫人仙逝扬州城', '冷子兴演说荣国府']
Err on  ['食品安全国家标准', '食品中污染物限量']
Err on  ['尸魔三戏唐三藏', '圣僧恨逐美猴王']
Err on  ['情乱性从因爱欲', '神昏心动遇魔头']
Err on  ['机动车类型', '术语和定义']
Err on  ['我', '末代工农兵学员']
Err on  ['财政部', '国家税务总局关于非货币性资产投资企业所得税政策问题的通知']
Err on  ['弘扬“红船精神”', '走在时代前列']
Err on  ['陈光蕊赴任逢灾', '江流僧复仇报本']
Err on  ['蛇盘山诸神暗佑', '鹰愁涧意马收缰']
Err on  ['深化伙伴关系', '增强发展动力']
Err on  ['猪八戒义激猴王', '孙行者智降妖怪']
Err on  ['九九数完魔灭尽', '三三行满道归根']
Total 8824309 word vectors.


In [8]:
def build_embedding_matrix(embeddings_index, embedding_size, nb_words=NB_WORDS, word_index=data_transformer.tokenizer.word_index,):
    #nb_words = min(nb_words, len(embeddings_index))
    embedding_matrix = np.random.rand(nb_words, embedding_size)
    word_index = data_transformer.tokenizer.word_index
    null_words = open('null-word.txt', 'w', encoding='utf-8')
    null_ctr = 0
    for word, i in word_index.items():
        if i >= nb_words:
            null_words.write(word + '\n')
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
        else:
            print(word)
            null_ctr += 1
            null_words.write(word + '\n')
    print('Null word embeddings: %d' % null_ctr)
    return embedding_matrix

In [9]:
sgns_bigram_matrix = build_embedding_matrix(sgns_bigram_embedding, embedding_size=300)
tencent_ai_matrix = build_embedding_matrix(tencent_ai_embedding, embedding_size=200)

两项
一款
这是
过大年
多大
吃秀
一天
这会灯
 
爱自
楠哥
烯碳
a股
第一股
同食
信要
多个
三年
这一
海试
喻言吐槽
扎心
会变
上吐槽
喻言狂
或成
ceodiss
一分钟
常吃
竟能
群助
第三
赌高冰
赌出
超快
一枚
手麻
天去
孕妈
屏有
mix3
这枚
1000vs10
借水
十年
上万元
找下
市偷
此国
加它
三分钟
黑如
组图
霍金的
亿年
几片
养狗场
几千只
一抹
三天
十天
为市
镇有
10w
最严
万左右
日系
三席
最损
物可
条线
人能
群会
北三县
破万
燃脂
几年
一种
蒸出
号线
一分
天吃
天瘦
被切
一首
吃辣条
辣条
一百枚
会值
一万元
一层
几种
一把
突都
包难消
一杯
艾草
给治好
数十万
几个
好几个
治一好
稳降
请存
半个
狂降
速存
欲生
日起
限重
销分
被查
分罚
新交规
惊现
放车
日新规
将会
考驾照
要持
双证
消分
飙戏
车购
无糖
第一次
想不长
最该
有福
生吃
长出
两个
包块
开喷战
我生
男宝
虐童
两证
要罚
审新规
卖不动
h6
一笔
可领
120km
将成
首条
四驱带
一看
比哈弗
万要
几量
没差
级车
美系
逼格
加分制
曝已
颈纹
美过
因丑
一张
5g
投给
25419
怀男
万公里
最脏
七天
十斤
瘦出
半年
一个月
八斤
瘦到
天减
减腰
天让
一条
玩太多
狗伤
不起
三十出头
这件
奶爸
只降
家有
快存
两周
肥要
養顏茶
吃瓜
性侵
辣妈
生完
暴瘦
狂瘦
甩成
这几物
一副
这四物
秘制
仨月
几味
老款
配全
时四驱
这才
玉娆
牛孩
吹大
撒药治
换个
拿奖
三千年
抹平
两种
消脂
宝妈
软肥肚
两物
几款
万拍
这台
一辆
达人教
能当
gl550
去味剂
已变
选股法
多只
日有
冯提
多万
s400l
懂车
一碗
甩腹
几口
排宿
刮光
上百万
一次
一声
岁萝莉
晚清
八年
170508
撩妹
171008
171110
首个
煅荷
物泡
一泡
几天
一场
半月
已花
抖音
妖股
亿封板
下周
连板
墓主
神押题
三味
快学
熬点
补脾
三物
18888888888
万买台
a6l
cs95
直降
男神
五年
生神
泪洒
十万分之一
还育
仍似
千元
交了
从根
三种
再虐
一班
抑癌
四点
四次
这国
快成
第二个
乐视
亿

说养
再成
我服
几百亿
妹打
想回
东太宠
东太爱
贵马云
东娶
东携
人成
马云举
下干
年三大
出个
六上
将献唱
卖冰沙
鞭头
人乱
第二胎
针针
演不动
只选
不选贵
后放话
喝死
哭道
他养
第六次
问会
挑断
雨中
睡光
越演
越烈
舒入
曝曾
惯三
王鸥长
杨紫秦
这事要
未到
幂家
获杨
幂力
变男神
翻旧
照疑
舒来
已和丽虹成
吃开
带秋菊
未受
卓伟放话
四五次
曝杨
一月份
揭杨
当小三
中闷
妻杨
乐杨
分乘
不戴
选杨
幂己
没王鸥
肩夜会
难掩
其补
强睡
曝强
白演
刚帮王
曝将
手接
没人道
回怼
部戏
遭王
病犯
爆如
别洗
李晨车
震变
卓伟冲
胡军家
多戏
食药
八岁
算白演
帮王
债后
涛姐
臀超
你露
王珂竟
智问
张翰五任
没手
有活
话癌
太能
刘涛终
王珂疑
吐言
疑将
演不下
刚帮
刚替
竟狂
床照
撑家
手接素
情崩
太缺
多脏
因王
吐真言
王珂要
更狠
杨紫只
借精
懒理
人妇
诗孕肚
喜当妈
马舒雅竟
诗挺孕
肚照
肚现
微博新
诗生孩
诗生
诗红毯
诗发
狂撒狗
真像
五五分
诗造
拖手
美呆
这小
刘谦因
在度
刘谦给
刘谦遭
刘雯大
维密
俯揽
001a
会大
不惊
反喜
肉里
不雅照
飞扫
望冲
没卖
十只
天要
失联前
刘仕余
春春
会限行
拉响
指暴击
七连板
心塞
爆一
价达
归马蓉
天嫩
十颗
薛之谦前
曝恋
刘涛学
股要
创客
马云愿
上滴
sunnee
爆肝点
创金合信
想过
塘主险
大妙
删光
删前速
删完
微博秀
白之路
删张杰
扫一扫
左滑
一页
別克
力压
one77
假奶
第三尊
超顺丰
利巴韦
群日
一单
完唐
利用微
还准
信小
别一
强不强
浮灵
张冠号
别信酸儿
别信
中侧
睡个
好觉
擦点
错易
大暴
伴上
斑全
排湿毒
2a
不糊
比跑
剪个
皮薄汁
脆甜
美白针
表了
最过
有餐
十几倍
别忘
八城
喝分
帅到
一窝灭
就切
别用
特怕
它成
中王
看汉兰达
x2
别瞎传
油见
伤车
油表
别老
no12
别装
先清肠
50ml
乌如
c9
酒才
那英毁
爆实
办婚
纯粮
亿留
刷不白
总爱用
沾点
白刷
事牙
牙黄
加些
时挤
刺苋
挤血
手党
前中國
壮牛
一学
之花
宣布独立
婚房秀
胎梦真能
吴京逆袭
htycoin
之声
泥加
马云慌
第一枪
待夫
杀

弃车
毒狗
省出
杨紫周
二十多年
礼上
亲姑
曾痛
姜汉娜
姑侄
因患
反杀
实锤图
秀同款
这女
大爆光
曝实
杨紫微
博晒
很配
大美妞
藏得够
张一山恋
曝生
杨紫要
被抛弃了
很多年
爹坑
传患
新晋
别笑
宠妹
张予
张伦硕带
十口
张伯芝
想灭
完候
活要
疯来
51a
动它
美朝
五部
装逼
爸人脉
蒋欣遇
截胡小
鞠婧
持棍
还进
吊缆
局狂
脖卡
谈个
玩过
陈晓退
张有良
想离
亨离
正脸
杰哥
张杰录
照为
叫炅炅
图竟
亿购
三处
开黑
杰粉
删博
中谢娜
还合开
传着
二十个
说事
十月底
喧称
交新
遭霆锋
不输于
字引
港力
追生
新戏定
锋芝恋
愈像
亿想
可退
亿争
带俩
锋菲
接三人
谢贤笑
度入
不缺
没谈过
要争
携新
子不当
程冠希
个官
再多生
耍酷
多金
别想用
不悔
现港
竟放话
谢贤因
累到
要大婚
曝想
疑反
怒称
被富
谢贤回
谢贤竟
全随
张伦硕姓
已无路
不亏
下输
李晨泪
之家终
护女
孙上
不活
张檬妆容
沉银
张琼辉
曝不雅
科甜
戴上
位炮友
酷黑
迷妹
很傲
科姗恋
没替
跟景甜
害惨
成渣现
有颜
戏里
张翰快
两字
比前
另择
张翰出
博隔空
跟爽
话成
大剧
郑爽疑
扎气
塘主
引众
塘主心
称娜
扎配
虐到
戏生情
微博传
张翰微
博秀花
张翰想
张翰新
接张翰
八比
张翰有
张翰暗
两人快
张翰爱
张翰用
连爽
用花
纯到
穿花
张翰怒
床战
冯铭潮
前旧
曝与娜
尽撒狗
要塘
扎粉
郑爸
张翰首
王思聪怒
最渣
爆恋
疑想
红亮
疑求
首档
秀将播
张加帅
有伴
可鹿晗
离组
只带
兴罗志祥
兴罗志祥要
坑成
两嫁
而鲁豫
唐艺昕发
五婚
指蹭秀
马思纯秀
跑友
男神配
约影
这有
晏疑
甜晒
晏有何
袁立会
曝欠
张雨绮生
七零后
发个
多好
博疑
婚烟
业有成
微博点
称非
比何洁
冯柯未
新恋
男情
没戴
手照
戏真多
没提
说些
几字
称有
获实
发些
有敢
窦骁同
予步
瘦至
频看
暴瘦致
张鹤伦当
面儿
过病
減肥法
也治好
彈指
台坛
爱要
f3
面唱
巨友门
并种
得值
亿夺
那颗
眼妆太
会得眼
补身
吃蒜法
当有
信刷
当男神
大吐槽
差遭
掺上
彩王
彪哥
打后
有爱互
继应
丫是
换座
演个
晏因
遭击
晏惊
登热
我出
哔柜
神级
晏要
街吻
一幕幕
一百亩


天不烂
传首
猪价
先备
首安茶
卖多
肉能
往家
肉堪
少吃点
猫成
一两天
难有
那烂陀寺
为博
几十人
红沙皮
需一剪
剪后
百分之四十
百分之八十
百分之五十
米须
追美合
王一博美合
王三喜
无湿
王丽坤林
爆林
我特
逆袭成
手残
王源成
他成
护易
男大
再摊
往死里整
已马
问马云
捐后
疑骗
该负
因钱
他选
蒋欣发
微博甜
走虐
同框照
将生
网传王
六小时
同逛
关系暧昧
打鲁能
王如峰
文男
不归路
领家
博谈
宋喆该
强交新
马蓉强
强替
宋喆养
网传宝强
中泪点
强泪
熊乃瑾秀
很正
眷侣
讽其
188c
强前
称若
比马蓉靓
宋喆恐
微博疑
长点
强宝强
宋喆会
宋喆终
终进
宋喆进
强摊
发博称
超马蓉
胜马蓉
熊乃瑾疑
称马蓉
爆宝强
京金联
赵薇版
强案
曾祝
引马蓉
爆遭
发博表
马上会
却长
看博
赞宝强
待议
有宝强
归宝强
分归
微博向
我错
遭女
悔青
马蓉案
下才
能办
万寻
凌杨
最暖
脑残粉
豆得儿
寻赏
三尺
王思聪为
马云思聪
王思聪出
张翰神
嫁富
能年入
雪莉富
带妆
有胜
发冯
这该
病治好
王思聪友
王思聪发
莫怒
却花
送车
称鹿晗
脸景甜
英放狠话
非奸
王思聪带
后王
微博怒
喷马云
妹凭
王思聪手
李晨白
章泽天口
东怒
王太太
比豆
得儿
张小蒙
王思聪欲出
王思聪欲花
而花
讽冯
红狗
王思聪狂
真绝
王思聪疑
讽鹿晗
面说
王思聪称
改叫
竟出
字扎心
王思聪竟
东杠
为父
王思聪约
王思聪维密秀
之手
发床
王思聪要
我愿用
英滚出
王思聪讽
红靠
王思聪道
杨紫丑
内在美
清纯玉女
清竟
亲脚
变哑
内必
他备
撩者
博人
看行
易祥千玺
图露
李晨发
不忘秀
源哥
真唱过
照超
馨爷
王珂刚
第二任
王珂靠
朴珺
竟变
他应
买太多
叹言
多宝
日服
浪个
铠说
必入
天美请
变高
王艳兵
疑恋
锋会
问霆锋
碰才
祝你幸福
携女
再孕
大挺
不拒
四合
曾闹
可据
美若
宗萨钦哲
窦唯哭
不丑
谢贤要
流產
謝霆鋒
張柏芝
應該
用四字
养胖
这胎
数千万
爆孽缘
导锁
甜过
共游
复炽
多痛
柏芝选
离港
开吵
英忙
盆满
肚大如筐
爱情真
有三爹
谢贤怒
窦唯之女
疑帮
肚大
眼含
峰芝
曝上
要结
谢贤则
称如
摆席
戏忙
次孕
真孕
爆签
放料
克屎
如歌
但苦
家四
两亲

整月
原子城
九景衢
宝兰客
联试
留放
可防脱
传造
恋再
现又陷
变虚
偷排
百余辆
谢贤疑
八十多岁
谢贤刚
谢贤谈
十六岁
沛纳海
财產归
交个
谢贤想分
吃味传
谢贤称
埋于
曝加
车油
拟运向
指系
行牛
控水
互呛
过高要
茶越
无降配
多市
五象
始作
谣者
蓝鸥
还处
三考
立辨
很乱系
周超快
全了
附超全
已诞下
好孕气
重男
生宝状
数十秒
美羊羊
人吃生
从许
看恋童
网红许
指恋童
踏俩
组间
晨重
第二阶段
辣妈用
练鬼步
速码
天脱
猛瘦
亲荐
可减
日减
甩肚
脂茶
三十九天
每招
姜蜜水
第四天
好几斤
第七天
美拍
甩油
共瘦
莫暴
精脸
大呼求
神舞
极塑
体轻
单腿
特减
第二十三
半根
中圣品
知爽
含屎
没响
推伤
大卧
姜泡
每早
无三高
遭许
晴批
小威威
宋雨洪
对车
政解
越高越
高鑫磊
现神
癌犯
以微博
薛之谦个
周杰互
最靓
四摄
崩人设
坑夫
没睡
安和桥
谦谦爱
业之峰
玩得
王小鸥
真忙
鼓不起
内要
网剧令
几十载
怪孩
返本型
油会
美系车
万近
还享
曝卖
领弹
王一博自
消保委约
alpd
而生
人练
老铁要
里会
粘肠
错得
英签
青微博
无墅
罗曼尼
喝多会
分变
痴汉脸
lms
拖米
karsa
已好
4am
放人
任栋
没说
koro1
带妹
怪用
ziv
查飞单
许知
am4
矿圈
禁挖
f2pool
吴评鑫
转势
删路
反招
比脸
被尬
黑假
其言尽
拍吻
戏用
撒内宁
翻路
辱华
凭颜值
该查
脱库
轩墨
之扣
中毅
况丽任
元股
pmma
科通
芯城
中酒协
灿笑
附选股
无应
或迎
被否
更严
日要
假米
含微
杨颖太像
显矮
一人带
曝假
这大招
安迪竟
依乐
脸贴
变政
upit
币安将
称中美
资色
比安迪豪
测下
超有
看孕囊
超五
凑好
旦下
速接
家获
数百架
没料
商飞用
问西
媒脸
队不换
看自
第一发
高宇翔
根茶
根可治癌
涉癌
雷臻
链网
qklw
罗又陷
鲁尼梅格
伊瓜
难平
怎敌
已互
冷讽
误造
亿追内
济奥
罗转
欧冠要
曝齐祖
罗表
西媒报
梅西发
大竟
其原
罗送
内马尔回
▹
被旭旭
必掉
狂出
未锁抗
机盗
150426
黄子稻
系富
加新
嘴炮
十几架
帅炸
35b
密爱
iovine
东晒
店卷款
万跑
1440cc
行能
博为
宇不

遭开
雷军排
最尾
难一见
上极
鬼村
未炸
造歼
逼美
微博太
搜是
加身
吴雨珏
将播
阿乌巴
后谈
成吐槽
涨粉
发快
名印
常林回
半年前
人散
现售
掌厅
f117
烟渍
越中
种人领
终遇
沈万三
遭泼油
李秋平外
电地
和晶
豪斥
稳抓
信反信
亿小散
城洞
大暴击
无数个
马云放
俞凌雄点
人年入
超坚瑞沃
超赣锋
诚迈
士兰
微封
博聞社
離境
萬達
崩盤
求大
库鸟
读品
游一游
小师弟
被免
一逆天
巨坑
子之父
恰尔汗
代差
航发
无梦到
莱口
觉太
觉浅
睡不著
不夜
无梦
巨魔成
出蓝贴
极寒来
起转
startfragment
有雨
迎特
中抱
旧图
今明
虽多
天气晴好
天别
2018121
十座
狂整
几腿
惊掉
而现
示软求
过胎
亮哥心
第三步
第一盏
王思聪卓伟
爱嘻哈
七张
新瓜
清力
探班求
发正
涉入
长小
马蓉微
卓尔写
整幢
满帮
医三人
靠种
有钱赚
一百多亩
稻蟹
人不告
糙发
膏来
艾尚彩
出一
食必思
黄焖
陷商票
说学
多梦太熬
假觉
韩美军
乘热
放太多
购内少
蜜友
苏索辟
权健亚冠
恒大真
核闹
扎哈维转
权健换帅
权健要
抢人换
成索萨
两支
李霄鹏任
刀别
而动
一土帅
中超队
阿隆来
有鲁能
谈换帅
中超近
李总任
换帅系
还应
托大
学恒大
奖真
只升
上港成
贺惯
赢上
纳英戈兰
成中超
恒大要
当枪
杜煜征
津媒欲
被鲁能
遭截
卖药
一擦灵
赶离
口人
宝斗石
终取
赵蕊蕊
scoal
星人怕
临冠路
宋集坞
estarpro
四连号
防拐
会吸
板放
御锦湾
等校
指不收
害娃
相不中
遭喷
小三假
终出
出道史
赵楚纶
小三后
如一人
这小三当
超美腻
小三史
官博已
热聊引
发羊癫
前先
接甜馨
十二岁
生咬蛇
宝妈别
喜茶
人假
论耍
带人到
竟送
引骂战
剩油
染绿
甩马蓉
几十条
虾肠
卡补审
胖疑
马云送
恒大欲
孙莉生
三朵
排期
粉粹
机来
黄磊首
铁卫吉尔
jdg
用纳尔
骚男道
往鲁能
针多针
遭洪爷
日停
我大
鱼籽
非小
用曲
美替
非尼
恒大刚
恒大真核
人常去
为北飞
曝向
减购
为成
惨触
音当
网红竟
录抖音
身疾
男说
身残
亿全
摸象
肃毒局
没人教
终盼来
简安桀
十余
信错
竟害
却赖
辽足求
多人持
屍體
嘗到
自動
久升
一东
几单
飙泪答


成罗晋
网传罗
中奥
共著
几页
翔疑
争足
整死
几座
某天团
有悲
太初
冲电
赵丽颖张
粉捞
许华升
宝想
可颜值
差太多
满盆满
赵丽颖开
赚足
水酿
鸡鸟
当糖
卡喉
请范伟
恐只
互骂
吴亦凡用
科粉
清怒
薛之谦买
臀九女
扎为助
女宠
签詹皇
无香
小迷弟
杨洋刚
话现
杨洋正
茉上
一缸
杨洋竟
亲小爽
井柏然史
郑爽表
杨洋狂
后醉驾
李慧林
致钙
湿仓
天给
药流
夏热特
网红科迪
显瘦变
出牙晚
对会
中脂
起反
过多时
越苦
大茶
超贊
超強
酒风
别多
精元
脑里
追弗神
化骨龙
叫食
绿瘦
磨膝
吞饭
素会致
咳不治
种好物
如石
天硬
一碰酒
脸不红
头不晕
而应
怒提
之辱
一游
生三女
中因
杨紫为
设要
拍战
迷妹们
关爸
宝沃要
头长
16607346286
真矿
拘一人
对越
需厚植
血溅
刀围
币算力
脸中
最蠢萌
大未
变尖
胸比
赵丽颖大
力捧连
如孙俪
脸获
雷哭
赵丽颖大脸
穿乳环
脸大眼
赵丽颖旧
赵丽颖火
赵丽颖素
王嘉尔要
胖回
脸现
图惊现
赵丽颖逆袭
整美
停戏
课上
电揽
邵家桥
日站
正办
内不受
换特雷
文旅部
骚男
日众
日举国
道准
说华仔
护犊
施一公因
起贷
曝施
一公因
牵绳
器来
出会
可离
比蔚
领帅
urus
这台家
纸糊车
开不烂
翻增
89413
万想
传祺给
入内
变豪车
变双
太挺
团内互
整没整
替千玺
妖猫
研可
朗动
仨孩
胆肥
撩汉大
桦褐孔菌
竟神
内惊现
快种
戒忧
s500
印找
平趟
荒信
称池
忠国
董翰麟
需经
新帅雅
继恒大
巴坎布
权健恒大才
恒大狂
购纳
因格兰
恒大过
若纳斯
继黄子
三回
多厚
风百碟
骑过
谢娜整
剪肉
张瘦
碱肥
类不含
很乖
界竟
皮淡定
因凡
忙点
寄诚
签后
唐璧华
前真龙
真龙终
擒龙
男圆肚
還淡斑
同喝
如夜
乌如润墨
黑得加
亿三大
签预
微博靠
黑钻要
不淫
可种
两批
一建四改
被施
打大呼
黑牛
小浩宇
飞近
脸像
微信常
外治
死十人
鉴江
某权
书恒
如萍
雪姨成
因谢贤
两人下
曝欲
几代
未发
闺蜜应
卢靖姗要
太多竟
错床
清忙
几十亿美元
201804
互关
不后
五连板
超柘中
亿筑底
真多得
侵虐
男主带
提兰城
异人族
多该
这擦
该进
來時
機會
詹皇明
逃不脱
中大有
后代子孙
主贵且

对张
致晕
已俩
宋喆大
女炫富
太贱
随江
陪江
葛军出
株潭来
龙画虎
自护
持斧
多箱
扎演
有多酷
爽妹
没火当
郑爽江
翟天
临则
对柏芝
皮切成
天变润
马航失
联三大
找代驾
克雪菊
一个通
准降
叶吃
根都
天杞
降掉
清肠利
太高人
药钱
个降
又护
别老靠
三常
没高过
十吃十降
能通
总降
样小
种人会
捉蛇
被拉进
夺视
图疑
需放个
娱记
韬拉着
何炅为
种同食
吃超
如网
器系
利智会
弄活
画成
六架
橘光
多架
凯凯王
胡可沙溢
梁泳仪
不买包
陈思城
刘恩乔
一點
不浮
一寶
輕十歲
值会
张免
扬要
假快播
会补
长得壮
还护心
毒上
降三高护
不体
好几碗
鲜滑
味美有
老香
真需
改搬
华尔道
夫要
余人系
乡来
人偷
累晕
红雷哥
银隆系
壮长
打多
亲案
180109
一人敌
店女
饮巧
茶酱
ꈊ
迁房
读晨记
171211
排名第
四地
信马云
我州
叠溪
吴大真教
突有
李老师
三豆
六十米
群多
速转求
遮十丑
狂揽
需泡
脚水里
肚平
哥玩
不向
受此
币价
没黑够
抵台
丽颖有
因王俊凯
唯粉
频接
年凯家
王俊凯毒
患小头
美白误
水润底
瓷肌
蓝水会
伤不伤
密疑
希间
赵丽颖颖宝
赵丽颖起
秀自
被布
无皱
配一物
紫越
肾能
皮茶
天比
似炭
透黑
灿友们
了颖宝
瑜许
力挺颖宝
郑爽暴
灵招
想染
黑如绸
越松
洗牙易
洗牙后
新专
玩会
脸能
蓝月亮
陈皮配
连洪欣
过范
看洪金
范丞丞用
问范丞丞
五十多岁
赵丽颖剧
乔让
真扎心
此警情
限油
被车
脸属
你配
入岛
等涉
井系
酒比
涂磊会
抵套
买公
某小编
无龄
仔人
刚卓伟
绿着
有超
首认
发都
前洁面
洗能
无赤
卖个
至雄安
摇号系
出醜
极少数
变飞
号生
卖吉鲁
晚旗报
厄齐尔去
驱火
冲杯
谣速
徐峥会
171210
五线
翔患
mdd
千寒易
一湿
难除
老湿毒
祛得
角老
想信
采熙
偷领
捞车
没派
中盛
入碗
不融
城坠亡
坠亡终
再辟
城江
勒伤
必推
杨馥瑜
以歌
搭逆袭
万同款
我昕姐
亲辟
额旗
十几名
地系
一个千年
穷常
精虚
补虚温
男要
排前
肾为
佐饭
健力
之品
大且
之圣品
美白片
美要
疑打
荆监
谣称
我进
两将
抢戈登
超六
五场
一宝妈
灿星
完真
后盖谍
机背
养颜养
有苦
过塞到
超给力

养声
露下
种颜色
医都
没卵用
几万人
点事
那英首
小三开
打星们
红本
镇因
张恒新
赵丽颖接
入群
张翰力
继快
称马
这四字
前开
两人发
何炅维嘉
內地
爽恋
糖里
两人似
微博珠
曝马
微博写
整蒙
家场
找力宏
三人算
息屏
李湘爆
云伟
马云演
接男宝进
信则
女像
我独
一食材
头不疼
一人战
算才
图比
试目
脸谈
已存
万人存
网红为
音惨
漫展
关晓彤团
关晓彤索
比蛇
baofeng
男贱
假裙
撕人
小汉教
爆汪峰
不来学
顶百副药
一钓
获多到
手竿台
可连
低钠
称地
心人
再条
添未解
侠竟
四无
面若
同锅
编可转
兑到
时撒
发吃点
前必
一东后
所转
日岳阳
不关会
某芳
诗张
根硕
个养
肝菜
天由
体大如
嘴大如盆
快偷
出尽
邀戏
闫妮认
上纹
只罚
将记
分狠
大猪
终未
如润墨
从生
兑上
墨亮
司匹林
多载
不染头
变乌密
似润墨
不抹染剂
再密
天来
三吉
不学太亏
似墨别
发再野
五丸
多根
如雨落
虽美白
皱显
需染
如森
凶白
四粒
三千丈
斥小三
疑指赖
公媒
李荣浩去
劲销
万多辆
防蹭网
恩凯
我来养
胖一
遇收
某非
次婚
杨紫问
男神名
草有主
儿疑
夜互
发照秀
后介
现欲
用六字
当谢娜
懂博越
纵置
减了
清肠养
时黃瓜別
脂掉
有奇方
湿毒调
亲看
开掉
千平
人暴
秘透
决密
套别
还斗得
预谈
会降
mp467
马云爆
一马
中造
马云透
吴京触
马云分
一个千
变富
穷请
富人榜
马云排
马云财
之神
觉察到
20182019
再夺
东笑
曝马伊
遭黄
缝生
我火
投马
发棒
增甜
龙空
探因
乔恩遭
这比
微搏求
微博带
讽王
仍住
马蓉过
大孕肚
愁色
鹿晗发
榨成
以谢
打马蓉
送宝强
问约
分王
送马蓉
马蓉扎
书算
忙分
叫宝强
马蓉刚
驯熊
微博初
买靓房
宋喆称
搜之马蓉
野夫
卓伟狗
万欲
马蓉怒
马蓉时
需车
过满
一树
子喜
谢贤八
要秀
微博之言
陈坤红
迅怒
那英迫
希疑
千骨
希暗
陈晓面
陈晓臭
遭瞎传
希钓出
瞪大眼
怪到
林丹们
找小三
先开
人空
耀文
曹查理
探班白
baby16
偏高别
口全
假文
今恋
牛俊峰
有型
继惠氏
和护
崔岷植
何孟怀要
没照
已同
港媒称
门害
秘游
何孟怀于
人型
诵佛
抄经
对心
吻界
这盘
棋下
字服
似龙


曝齐祖
西媒报
梅西发
内马尔回
▹
被旭旭
未锁抗
店卷款
1440cc
姐退赛
偏慨全
姐赴
帮白娅婧
网传高
越聰明
當夜
貓族
小智称
啪姐
连麦散
变人全
完牙要
10gt
马蓉终
周琦发
条微博
180320
抬高自己
微博热文
明升车
发推称
qq144
karasa
推特谈
爆招
保健作用
现与九好
tfrboys
后易祥千玺
天合光
战悬
王源陷
王蒙芳
disspg
绿休团
真不看
陕甲
逆领
戏暴
毒通
净瘦
天刮肠治
回小蛮
通督法
颜值帝
28576
买哈弗
談靈體
码截屏
戊仁
杨颖大
欠裸贷
刚发糖
晒孕照
曝送
吴秀波方
曝带
懷男寶最
大特徵
一居卖
曹德旺成
ios80
ufs21
iphonex1
ufo37
称摩拜
成转商
有细针
王兴靠
只捧林
王智产女
破虚红
看球系
曾哥悟悟
霉霉和赛
小虎铁
大东翔
竞有料
致万人
网传皂
称马航
轻松自在
打其脸
雪中放
健今
房算白
慌教
乐视要
妥滴
传梦老
网传酒
网传龙
桂风起
心热时
想不美
箱偷
照火遍
百人团
说春白
中春尾寿眉
西牛岭
侯继刚
暂不收
水贝买
前将现
香颜值
发微斥
危物種
万疑
能防雾
喝鼠
乃不实
暖文有
删长
双滦事
通过观察
171013
特破此
传烧
双榜来
中吃出
lucas1
发给你
在京举行
挖莱万
曝蒂
莱万亲
众筹刷单
何一称
网传豪
币之父
jgg88885
曝偷
删孕
深得于
很多遍
王丽坤苦
网红莉哥
就会松
蛭富
麝取
养鹅有
王凯称
删不实
群传雪
若风为
四连辟
放洁
国通星
不雅未
高鑫情
早吃金
晚吃毒
称微博
桃梨橙柿
金宇车
微博为
辛芷蕾陷
乘网
剂致
唐艺昕互
路肉串
瑞世佳典
网传湖
体脂会
途虎卖
筹旧
圈七大
造财库
半女宝
这不王
剧招
注胶门
李书沸徐
要拉黑
暗黄长
那血病
肾越
满暖心
恒大有
赵丽颖方
赵丽颖用
治蛾
纪台桥
马斯切
孙安佐案
马云当
但会致
吃产
国燕委
发防骗
锋霸亲
全仓该
对乐视
传辉山
放黑血
战帕奎
无根粉
最扎心
传市
治蛾别
疯传明
撒药治蛾
浙土梅
微博送
白鸠川
赵丽颖变
两作媒
151023
推常旅
李昂发
演侯亮
演候
男主陆毅
靳东换
陆毅版
旗币
女主选
王俊凯担
凯靳东
剧荒期
剧透下
昆凌请
姿疑怀
演女主
女主比
张翰开
倪秋云家
同食变
男一

对美日
显孕态
人留灯
分小叉
分大叉
蜜鸡
昆凌速怀
杰微博
真之棒
网传夕佳
钱多事少
识瓜
一拍知
赵薇辟
救璐
邓超排
范伟有
深扒林
讀透
社會少
王建林成
陈翔家
躲远点
信勿转
颜美白
咬核
堵教
对景甜
曝李玉刚
隐婚生
带景甜
遭小三
竟无一人
暴瘦照
爆陈羽
七十九期
三大点力
杜淳家
对海沃德
曹云金回
收六到
不实系
杨紫是
杨紫怒
对刀郎
和玲花
与玲花
刘丹自
如今已是
真得瑟
剧不红
演小三演
一物降
佟丽娅成
谁给了
联沃家
燃脂舞
排尽肠毒
刮胆经
舞瘦
爆汗服
变身易
瘦伤
rgmofwt
微博刚
带辣模
般水嫩
水里点
好白快
美白品
美白达
组扎心
和慈禧
背对着
曝闻
艰要
传和雷
这小妙
佩莱绝
神隐近
致墙
张召忠评
侯舍予
强塞狗
点半整
干十连板
哈弗方
万比卡宴
文之事
节朱
之文带
300644
一妖股
更博发
爆堪
胡井马
天人设
rplxzqk
rgvkip0
根老长
背寿字
李晟生
陈晓陪
发不实
窦唯过
发六字
葛宏向
莧加
男主李
挂林兰协
挂林兰展
人砸展
春春笑
马曼玲系
肥妞变
问甜馨
誓无二心
洗白后
送甜馨
喜当姐
pdone
官博早
腹儿
小三找
陈学冬帮
曝应
这下馨爷
引人伶
甜馨会
白拉上
瘦爆
微博似
萌不受
之恩该
遭范爷
曝不交
王岳伦认
家豪到
辨不出
维嘉有
用宝妈
微博露
选已定
时不愿
秤坏
等着瞧
仍不卖
耳替
小罗晋
粉反
一球定
比衣品
蒋欣竟
疑因怀
刘丹为
幂成
家庭不和
微博疑求
比母桃
替宝强
清讽
前撒狗
郑爽管
井柏然求
朱亚贤
埃神入
杨紫上
身潮
张檬整
毛宁唱
不老女
这拉风
杨颖要
一个天
成白富
杨颖欲
杨颖弃
杨颖称
疑马蓉
暴瘦似
杨颖因
粮天
跑出去
婊用
沈腾神
嫂方媛
照美胸
洪明伟
手遮肚
丁霞斥
用珍仪
头椿会
孩溺
赵丽颖认
朱丹为
三两个
三沙同
别老去
既不伤
比颖宝
两大准
虐太惨
微博护
生大眼
王大骂
提猜
超多炸
这狗会
三大珍肴
亿怒
天刮脂
臭宿
生爱新
西虹市
天祛痘
萨普爱思
博爆
曝锋菲
谢敢
曾不输
是治好
它治好
除菌全
有千人
懒理江
江小三
比王凯
那古仔
鸡无鸡
揭鹿晗
卓伟带
马黛双
成新进
宋喆大
女炫富
葛军出
株潭来
扎演
有多酷
没火当
郑爽江
对柏芝
天变润
联三大
一个通
清肠利
太

# Trick Features

In [10]:
train_df = pd.read_csv('../data/dataset/train.csv')
test_df = pd.read_csv('../data/dataset/test.csv')

In [11]:
rumor_words_list = ['辟谣', '谣言', '谣传', '传谣', '澄清', '真相', '假新闻', '传言', '造谣', '假消息', '不实', '勿传', '假的', '子虚乌有', '诈骗', '骗局', '以讹传讹']

def is_rumor(text):
    if type(text) != str:
        print(text, type(text))
        return 0
    energy = 0
    for rumor_word in rumor_words_list:
        if rumor_word in text:
            energy = 1
    return energy

def has_split_symbol(text):
    if type(text) != str:
        return 0
    if '|' in text:
        return 1
    return 0

for df in [train_df, test_df]:
    df['has_|'] = df['title2_zh'].apply(has_split_symbol)
    df['has_rumor_words'] = df['title2_zh'].apply(is_rumor)

nan <class 'float'>
nan <class 'float'>
nan <class 'float'>
nan <class 'float'>
nan <class 'float'>
nan <class 'float'>
nan <class 'float'>
nan <class 'float'>


In [12]:
train_has_rumor = train_df.has_rumor_words.values
test_has_rumor = test_df.has_rumor_words.values

trick_trains_features = np.concatenate((trains[2], train_has_rumor.reshape((-1, 1))), axis=1)
trick_tests_features = np.concatenate((tests[2], test_has_rumor.reshape((-1, 1))), axis=1)

In [13]:
def _build_exact_match_sequences(sent_1, sent_2):
    sent_1_char_set = set(sent_1)
    sent_2_char_set = set(sent_2)
    intersection = sent_1_char_set & sent_2_char_set
    
    sent_1_em = np.zeros_like(sent_1)
    sent_2_em = np.zeros_like(sent_2)

    for i in range(len(sent_1)):
        if sent_1[i] == 0:
            continue
        if sent_1[i] in intersection:
            sent_1_em[i] = 1
    
    for i in range(len(sent_2)):
        if sent_2[i] == 0:
            continue        
        if sent_2[i] in intersection:
            sent_2_em[i] = 1
    
    return sent_1_em, sent_2_em

def build_exact_match_sequences(sents_1, sents_2):
    sents_1_em, sents_2_em = [], []
    for sent_1, sent_2 in zip(sents_1, sents_2):
        sent_1_em, sent_2_em = _build_exact_match_sequences(sent_1, sent_2)
        sents_1_em.append(sent_1_em)
        sents_2_em.append(sent_2_em)
    return np.array(sents_1_em), np.array(sents_2_em)

In [14]:
%%time
trains_1_ems, trains_2_ems = build_exact_match_sequences(trains[0], trains[1])
tests_1_ems, tests_2_ems = build_exact_match_sequences(tests[0], tests[1])

Wall time: 9.26 s


In [15]:
print("Shape of train em", trains_1_ems.shape, trains_2_ems.shape)
print("Shape of test em", tests_1_ems.shape, tests_2_ems.shape)

Shape of train em (320552, 30) (320552, 30)
Shape of test em (80126, 30) (80126, 30)


In [16]:
em_train_features = (trains_1_ems, trains_2_ems)
em_test_features = (tests_1_ems, tests_2_ems)

# Tricks ?

In [17]:
use_tricky = True

if use_tricky:
    trains = (trains[0], trains[1], trick_trains_features)
    tests = (tests[0], tests[1], trick_tests_features)

In [18]:
model_manager = ModelManager()

# Get Ensemble Labels

In [22]:
ensemble_submission = pd.read_csv('../data/ensemble/second_level/FirstLevelPseudoLabels.csv')
pseudo_labels = ensemble_submission[['unrelated', 'agreed', 'disagreed']].values

In [28]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import importlib

from sklearn.metrics import roc_auc_score, log_loss
from keras.callbacks import EarlyStopping, ModelCheckpoint

from iwillwin.config import model_config

class ModelTrainer(object):

    def __init__(self, model_stamp, epoch_num, learning_rate=1e-3,
                 shuffle_inputs=False, verbose_round=40, early_stopping_round=8):
        self.models = []
        self.model_stamp = model_stamp
        self.val_loss = -1
        self.auc = -1
        self.epoch_num = epoch_num
        self.learning_rate = learning_rate
        self.eps = 1e-10
        self.verbose_round = verbose_round
        self.early_stopping_round = early_stopping_round
        self.shuffle_inputs = shuffle_inputs

    def train_folds(self, X, y, fold_count, em_train_features, tests, em_test_features, pseudo_labels, batch_size, get_model_func, augments=None, skip_fold=0, patience=10, scale_sample_weight=False,
                    class_weight=None, self_aware=False, swap_input=False):
        X1, X2, features, = X
        em1, em2 = em_train_features
        features = features
        weight_val=scale_sample_weight

        fold_size = len(X1) // fold_count
        models = []
        fold_predictions = []
        score = 0

        for fold_id in range(0, fold_count):
            fold_start = fold_size * fold_id
            fold_end = fold_start + fold_size

            if fold_id == fold_count - 1:
                fold_end = len(X1)

            train_x1 = np.concatenate([X1[:fold_start], X1[fold_end:], tests[0]])
            train_x2 = np.concatenate([X2[:fold_start], X2[fold_end:], tests[1]])
            train_features = np.concatenate([features[:fold_start], features[fold_end:], tests[2]])
            
            train_em_1 = np.concatenate([em1[:fold_start], em1[fold_end:], em_test_features[0]])
            train_em_2 = np.concatenate([em2[:fold_start], em2[fold_end:], em_test_features[1]])
            
            train_y = np.concatenate([y[:fold_start], y[fold_end:], pseudo_labels])
            
            val_x1 = X1[fold_start:fold_end]
            val_x2 = X2[fold_start:fold_end]
            val_features = features[fold_start:fold_end]
            val_em1 = em1[fold_start:fold_end]
            val_em2 = em2[fold_start:fold_end]
            val_y = y[fold_start:fold_end]

            fold_pos = (np.sum(train_y) / len(train_x1))

            train_data = {
                "first_sentences": train_x1,
                "second_sentences": train_x2,
                "mata-features": train_features,
                "first_exact_match": train_em_1,
                "second_exact_match": train_em_2,
            }

            val_data = {
                "first_sentences": val_x1,
                "second_sentences": val_x2,
                "mata-features": val_features,
                "first_exact_match": val_em1,
                "second_exact_match": val_em2,
            }

            model, bst_val_score, fold_prediction = self._train_model_by_logloss(
                get_model_func(), batch_size, train_data, train_y, val_data, val_y, fold_id, patience, class_weight, weight_val=weight_val)
    
            score += bst_val_score
            models.append(model)
            fold_predictions.append(fold_prediction)

        self.models = models
        self.val_loss = score / fold_count
        return models, self.val_loss, fold_predictions

    def _train_model_by_logloss(self, model, batch_size, train_x, train_y, val_x, val_y, fold_id, patience):
        # return a list which holds [models, val_loss, auc, prediction]
        raise NotImplementedError

class KerasModelTrainer(ModelTrainer):

    def __init__(self, *args, **kwargs):
        super(KerasModelTrainer, self).__init__(*args, **kwargs)
        pass

    def _train_model_by_logloss(self, model, batch_size, train_x, train_y, val_x, val_y, fold_id, patience, class_weight, weight_val):
        early_stopping = EarlyStopping(monitor='val_weighted_accuracy', patience=patience)
        bst_model_path = self.model_stamp + "-pseudo-scaled-" + str(fold_id) + '.h5'
        print("Load weights from", bst_model_path)
        model.load_weights(bst_model_path)
        
        bst_model_path = self.model_stamp + "sec-pseudo-scaled-" + str(fold_id) + '.h5'
        val_data =  (val_x, val_y)
        model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)
        hist = model.fit(train_x, train_y,
                         validation_data=val_data,
                         epochs=self.epoch_num, batch_size=batch_size, shuffle=True,
                         class_weight={0: 1/16, 1:1/15, 2:1/5},
                         callbacks=[early_stopping, model_checkpoint],)
        bst_val_score = max(hist.history['val_weighted_accuracy'])
        model.load_weights(bst_model_path)
        predictions = model.predict(val_x)

        return model, bst_val_score, predictions

In [24]:
def weighted_accuracy(y_true, y_pred):
    weight = np.array([[1/16, 1/15, 1/5]])
    norm = [(1/16) + (1/15) + (1/5)]
    weight_mask = weight * y_true
    label_weights = K.max(K.cast(weight_mask, 'float32'), axis=-1)
    
    true_label = K.argmax(y_true, axis=-1)
    pred_label = K.argmax(y_pred, axis=-1)
    
    res = K.cast(K.equal(true_label, pred_label), tf.float32) * label_weights / K.sum(label_weights)
    res = K.sum(res)
    return res


# Training Phase

In [25]:
def get_dense_cnn(nb_words, embedding_dim, embedding_matrix, max_sequence_length, out_size,
    projection_dim=50, projection_hidden=0, projection_dropout=0.2,
    compare_dim=288, compare_dropout=0.2,
    dense_dim=50, dense_dropout=0.2,
    lr=1e-3, activation='relu'):

    q1 = Input(shape=(max_sequence_length,), name='first_sentences')
    q2 = Input(shape=(max_sequence_length,), name='second_sentences')
    meta_features_input = Input(shape=(36,), name='mata-features')
    
    
    embedding = Embedding(nb_words, embedding_dim,
                          weights=[embedding_matrix],
                          input_length=max_sequence_length,
                          trainable=False)
    
    q1_embed = embedding(q1)
    q1_embed = SpatialDropout1D(0.2)(q1_embed)
    q2_embed = embedding(q2)
    q2_embed = SpatialDropout1D(0.2)(q2_embed)

    th = TimeDistributed(Highway(activation='relu'))
    
    q1_encoded = th(q1_embed,)    
    q2_encoded = th(q2_embed,)
    
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)
    q1_encoded = Concatenate()([q2_aligned, q1_encoded])
    q2_encoded = Concatenate()([q1_aligned, q2_encoded])  
    
    cnn_init = Conv1D(42, 1, strides=1, padding='same', activation='relu')
    q1_seq = cnn_init(q1_encoded)
    q2_seq = cnn_init(q2_encoded)
    
    cnns = [Conv1D(42, 3, strides=1, padding='same', activation='relu') for i in range(3)]
    trans = [Conv1D(32, 1, strides=1, padding='same', activation='relu') for i in range(3)]
    
    
    for idx, cnn in enumerate(cnns):
        q1_aligned, q2_aligned = soft_attention_alignment(q1_seq, q2_seq)
        q1_encoded = Concatenate()([q1_seq, q2_aligned, q1_encoded])
        q2_encoded = Concatenate()([q2_seq, q1_aligned, q2_encoded])            
        q1_seq = cnn(q1_encoded)
        q2_seq = cnn(q2_encoded)    
    
    
    #capsule_pooling = Capsule(num_capsule=3, dim_capsule=600, routings=2, share_weights=True)
    
    # Pooling
    #q1_rep = Flatten()(capsule_pooling(q1_encoded))
    #q2_rep = Flatten()(capsule_pooling(q2_encoded))
    
    attn = AttentionWeightedAverage()
    
    
    q1_rep = apply_multiple(q1_encoded, [GlobalAvgPool1D(), GlobalMaxPool1D(), attn])
    q2_rep = apply_multiple(q2_encoded, [GlobalAvgPool1D(), GlobalMaxPool1D(), attn])    
    
    
    #meta_features = BatchNormalization()(meta_features_input)
    #meta_features = Dropout(0.8)(meta_features)
    #meta_features = Highway(activation='relu')(meta_features)
    
    # Classifier
    q_diff = substract(q1_rep, q2_rep)
    q_multi = Multiply()([q1_rep, q2_rep])
    h_all = Concatenate()([q1_rep, q2_rep, q_diff, q_multi,])
    h_all = Dropout(0.5)(h_all)
    #h_all = Highway(activation='relu')(h_all)
    #h_all = Dropout(0.2)(h_all)
    #h_all = Highway(activation='relu')(h_all)    
    h_all = Dense(128, activation='relu')(h_all)
    out_ = Dense(3, activation='softmax')(h_all)

    model = Model(inputs=[q1, q2, meta_features_input], outputs=out_)
    model.compile(optimizer=Adam(lr=lr, decay=1e-6, clipnorm=1), loss='categorical_crossentropy',
    metrics=['accuracy', weighted_accuracy])
    model.summary()
    return model

In [26]:
def numpy_weighted_accuracy(y_true, y_pred):
    weight = np.array([[1/16, 1/15, 1/5]])
    norm = [(1/16) + (1/15) + (1/5)]
    weight_mask = weight * y_true
    weight_mask = np.max(weight_mask, axis=-1)
    norms = np.sum(weight_mask)
    
    y_true = np.argmax(y_true, axis=-1)
    y_pred = np.argmax(y_pred, axis=-1)
    
    res = ((y_true == y_pred) * weight_mask).sum() / norms
    return res

# DenseCNN

## TenCent

In [30]:
fold_count = 8
#embedding_matrix = sgns_bigram_matrix
embedding_matrix = tencent_ai_matrix
EMBEDDING_DIM = 200

for i in range(1, len(model_manager.models_tag)):
    print("Work on model", i)
    model_tag = model_manager.models_tag[i]
    model_func = model_manager.model_funcs[i]
    #models_checkpoints_path = model_manager.models_checkpoints_pathes[i]
    models_checkpoints_path = "WordTC-DenseCNN5Layers-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

    model_submit_prefix = model_manager.submit_predix[i]
    model_class_weights = model_manager.model_class_weights[i]
    model_class_weights = None
    model_scale_sample_weights = model_manager.model_scale_sample_weights[i]
    model_scale_sample_weights = None
    model_patiences = model_manager.model_patiences[i]
    
    #model_class_weights = {0:100, 1:1.5, 0.9: 3}
    
    def _agent_get_model():
        return get_dense_cnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
        return model_func(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
    
    test_predicts_list = []
    oofs_predictions = []
    pre_trained_models = []

    trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
    models, score, folds_preds = trainer.train_folds(X=trains, y=labels, tests=tests, augments=None, fold_count=fold_count, batch_size=1024,
        em_train_features=em_train_features, em_test_features=em_test_features, pseudo_labels=pseudo_labels,                                      
        scale_sample_weight=model_scale_sample_weights, class_weight=model_class_weights,
        get_model_func=_agent_get_model, 
        patience=6)

    print("score", score)
    oofs_dir = "../data/pseudo/oofs/"
    output_dir = "../data/pseudo/output/"
    onehot_pred_dir = "../data/pseudo/one_hot_pred/"

    model_submit_prefix = "PSWordSGNS-DenseCNN5Layers-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
    
    oofs_path = oofs_dir + model_submit_prefix
    output_path = output_dir + model_submit_prefix
    one_hot_pred_path = onehot_pred_dir + "One-Hot" + model_submit_prefix

    print("Predicting training results...")
    train_predicts = np.concatenate(folds_preds, axis=0)
    oofs = pd.DataFrame({"unrelated": train_predicts[:, 0], "agreed": train_predicts[:, 1], "disagreed": train_predicts[:, 2]})
    score = numpy_weighted_accuracy(labels, oofs[['unrelated', 'agreed', 'disagreed']].values)
    submit_path = oofs_path + "-Train-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    oofs.to_csv(submit_path, index=False)
    

    print("Predicting testing results...")
    test_predicts_list = []
    for fold_id, model in enumerate(models):
        test_predicts = model.predict({"first_sentences":tests[0],
                                       "second_sentences":tests[1],
                                       "mata-features":tests[2],
                                       "first_exact_match": tests_1_ems,
                                       "second_exact_match": tests_2_ems,
                                      }, batch_size=128, verbose=1)

        test_predicts_list.append(test_predicts)

    test_predicts = np.zeros(test_predicts_list[0].shape)
    for fold_predict in test_predicts_list:
        test_predicts += fold_predict
    test_predicts /= len(test_predicts_list)

    test_predicts = pd.DataFrame({"unrelated": test_predicts[:, 0], "agreed": test_predicts[:, 1], "disagreed": test_predicts[:, 2]})
    submit_path = output_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    test_predicts.to_csv(submit_path, index=False) # 0.3343
    
    print("Predicting labeled testing results...")
    ids = pd.read_csv("../data/dataset/test.csv")
    pred_labels = test_predicts.idxmax(axis=1)
    sub = pd.DataFrame({"Id": ids['id'].values, "Category": pred_labels})
    submit_path = one_hot_pred_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    sub.to_csv(submit_path, index=False)
    break

Work on model 1




__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_5 (SpatialDro (None, 30, 200)      0           embedding_3[0][0]                
__________

Train on 360609 samples, validate on 40069 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
_________________________________________________________________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_9 (Spat

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_11 (SpatialDr (None

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_7 (Embedding)         (None, 30, 200)      20000000    first_sentences[0][0]            
                             

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_8 (Embedding)         (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropo

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_9 (Embedding)         (None, 30, 200)      20000000    first_sentences[0][0]            
   

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_10 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropo

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
score 0.8571360821309314
Predicting training results...
Predicting testing results...
Predicting labeled testing results...


In [30]:
numpy_weighted_accuracy(labels, oofs[['unrelated', 'agreed', 'disagreed']].values)

0.8550714133718949

## SGNS

In [31]:
fold_count = 8
embedding_matrix = sgns_bigram_matrix
#embedding_matrix = tencent_ai_matrix
EMBEDDING_DIM = 300

for i in range(1, len(model_manager.models_tag)):
    print("Work on model", i)
    model_tag = model_manager.models_tag[i]
    model_func = model_manager.model_funcs[i]
    #models_checkpoints_path = model_manager.models_checkpoints_pathes[i]
    models_checkpoints_path = "WordSGNS-DenseCNN5Layers-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

    model_submit_prefix = model_manager.submit_predix[i]
    model_class_weights = model_manager.model_class_weights[i]
    model_class_weights = None
    model_scale_sample_weights = model_manager.model_scale_sample_weights[i]
    model_scale_sample_weights = None
    model_patiences = model_manager.model_patiences[i]
    
    #model_class_weights = {0:100, 1:1.5, 0.9: 3}
    
    def _agent_get_model():
        return get_dense_cnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
        return model_func(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
    
    test_predicts_list = []
    oofs_predictions = []
    pre_trained_models = []
        
    trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
    models, score, folds_preds = trainer.train_folds(X=trains, y=labels, tests=tests, augments=None, fold_count=fold_count, batch_size=1024,
        em_train_features=em_train_features, em_test_features=em_test_features, pseudo_labels=pseudo_labels,                                      
        scale_sample_weight=model_scale_sample_weights, class_weight=model_class_weights,
        get_model_func=_agent_get_model, 
        patience=10)

    print("score", score)
    oofs_dir = "../data/pseudo/oofs/"
    output_dir = "../data/pseudo/output/"
    onehot_pred_dir = "../data/pseudo/one_hot_pred/"

    model_submit_prefix = "PSWordSGNS-DenseCNN5Layers-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
    
    oofs_path = oofs_dir + model_submit_prefix
    output_path = output_dir + model_submit_prefix
    one_hot_pred_path = onehot_pred_dir + "One-Hot" + model_submit_prefix

    print("Predicting training results...")
    train_predicts = np.concatenate(folds_preds, axis=0)
    oofs = pd.DataFrame({"unrelated": train_predicts[:, 0], "agreed": train_predicts[:, 1], "disagreed": train_predicts[:, 2]})
    score = numpy_weighted_accuracy(labels, oofs[['unrelated', 'agreed', 'disagreed']].values)
    submit_path = oofs_path + "-Train-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    oofs.to_csv(submit_path, index=False)
    

    print("Predicting testing results...")
    test_predicts_list = []
    for fold_id, model in enumerate(models):
        test_predicts = model.predict({"first_sentences":tests[0],
                                       "second_sentences":tests[1],
                                       "mata-features":tests[2],
                                       "first_exact_match": tests_1_ems,
                                       "second_exact_match": tests_2_ems,
                                      }, batch_size=128, verbose=1)

        test_predicts_list.append(test_predicts)

    test_predicts = np.zeros(test_predicts_list[0].shape)
    for fold_predict in test_predicts_list:
        test_predicts += fold_predict
    test_predicts /= len(test_predicts_list)

    test_predicts = pd.DataFrame({"unrelated": test_predicts[:, 0], "agreed": test_predicts[:, 1], "disagreed": test_predicts[:, 2]})
    submit_path = output_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    test_predicts.to_csv(submit_path, index=False) # 0.3343
    
    print("Predicting labeled testing results...")
    ids = pd.read_csv("../data/dataset/test.csv")
    pred_labels = test_predicts.idxmax(axis=1)
    sub = pd.DataFrame({"Id": ids['id'].values, "Category": pred_labels})
    submit_path = one_hot_pred_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    sub.to_csv(submit_path, index=False)
    break

Work on model 1




__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_9 (Embedding)         (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_17 (SpatialDr (None, 30, 300)      0           embedding_9[0][0]                
__________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_1

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_11 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                       

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_12 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                       

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_13 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_14 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_15 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
_____________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_16 (Embedding)        (None

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
score 0.8555160966380843
Predicting training results...
Predicting testing results...
Predicting labeled testing results...


In [32]:
numpy_weighted_accuracy(labels, oofs[['unrelated', 'agreed', 'disagreed']].values)

0.8519272497594231

# ESIM

In [26]:
from keras import regularizers

def get_ESIM(nb_words, embedding_dim, embedding_matrix, max_sequence_length, out_size,
    projection_dim=50, projection_hidden=0, projection_dropout=0.2,
    compare_dim=288, compare_dropout=0.2,
    dense_dim=50, dense_dropout=0.2,
    lr=1e-3, activation='relu'):

    q1 = Input(shape=(max_sequence_length,), name='first_sentences')
    q2 = Input(shape=(max_sequence_length,), name='second_sentences')
    q1_exact_match = Input(shape=(max_sequence_length,), name='first_exact_match')
    q2_exact_match = Input(shape=(max_sequence_length,), name='second_exact_match')
    
    input_layer_3 = Input(shape=(36,), name='mata-features', dtype="float32")
    
    embedding = Embedding(nb_words, embedding_dim,
                          weights=[embedding_matrix],
                          input_length=max_sequence_length,
                          trainable=False)
    
    q1_embed = embedding(q1)
    q1_embed = SpatialDropout1D(0.1)(q1_embed)
    q2_embed = embedding(q2)
    q2_embed = SpatialDropout1D(0.1)(q2_embed)

    batch_norm = BatchNormalization(axis=-1)
    q1_embed = batch_norm(q1_embed,)
    q2_embed = batch_norm(q2_embed,)  
    
    aggreation_gru = Bidirectional(CuDNNLSTM(100, return_sequences=True))
 
    q1_seq = aggreation_gru(q1_embed)
    q2_seq = aggreation_gru(q2_embed)
        
    q1_aligned, q2_aligned = soft_attention_alignment(q1_seq, q2_seq)
    
    q1_vec = Concatenate()([q1_seq, q2_aligned, substract(q1_seq, q2_aligned), Multiply()([q1_seq, q2_aligned])])
    q2_vec = Concatenate()([q2_seq, q1_aligned, substract(q2_seq, q1_aligned), Multiply()([q2_seq, q1_aligned])])
    
    compare_gru = Bidirectional(CuDNNLSTM(100, return_sequences=True))
    
    q1_rep = compare_gru(q1_vec)
    q2_rep = compare_gru(q2_vec)
    
    q1_rep = apply_multiple(q1_rep, [GlobalAvgPool1D(), GlobalMaxPool1D()])
    q2_rep = apply_multiple(q2_rep, [GlobalAvgPool1D(), GlobalMaxPool1D()])    
    
    h_all = Concatenate()([q1_rep, q2_rep])
    h_all = BatchNormalization()(h_all)
    
    h_all = Dense(256, activation='elu')(h_all)
    h_all = BatchNormalization()(h_all)
    h_all = Dropout(0.5)(h_all)
    
    h_all = Dense(256, activation='elu')(h_all)
    h_all = BatchNormalization()(h_all)
    h_all = Dropout(0.5)(h_all)
   
    out_ = Dense(3, activation='softmax')(h_all)
    
    model = Model(inputs=[q1, q2, input_layer_3, q1_exact_match, q2_exact_match], outputs=out_)
    model.compile(optimizer=Adam(lr=lr, decay=1e-6, clipnorm=1.5,), loss='categorical_crossentropy',
    metrics=['accuracy', weighted_accuracy])
    model.summary()
    return model

## TenCent

In [34]:
fold_count = 8
#embedding_matrix = sgns_bigram_matrix
embedding_matrix = tencent_ai_matrix
EMBEDDING_DIM = 200

for i in range(1, len(model_manager.models_tag)):
    print("Work on model", i)
    model_tag = model_manager.models_tag[i]
    model_func = model_manager.model_funcs[i]
    #models_checkpoints_path = model_manager.models_checkpoints_pathes[i]
    models_checkpoints_path = "WordTC-ESIM-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

    model_submit_prefix = model_manager.submit_predix[i]
    model_class_weights = model_manager.model_class_weights[i]
    model_scale_sample_weights = model_manager.model_scale_sample_weights[i]
    model_patiences = model_manager.model_patiences[i]
    
    model_class_weights = None
    
    def _agent_get_model():
        return get_ESIM(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE, lr=4e-4)
        return get_dense_cnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
        return model_func(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
    
    test_predicts_list = []
    oofs_predictions = []
    pre_trained_models = []
        
    trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
    models, score, folds_preds = trainer.train_folds(X=trains, y=labels, tests=tests, augments=None, fold_count=fold_count, batch_size=64,
        em_train_features=em_train_features, em_test_features=em_test_features, pseudo_labels=pseudo_labels,                                      
        scale_sample_weight=model_scale_sample_weights, class_weight=model_class_weights,
        get_model_func=_agent_get_model, 
        patience=7)

    print("score", score)
    oofs_dir = "../data/pseudo/oofs/"
    output_dir = "../data/pseudo/output/"
    onehot_pred_dir = "../data/pseudo/one_hot_pred/"

    model_submit_prefix = "PSWordTC-ESIM-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
    
    oofs_path = oofs_dir + model_submit_prefix
    output_path = output_dir + model_submit_prefix
    one_hot_pred_path = onehot_pred_dir + "One-Hot" + model_submit_prefix

    print("Predicting training results...")
    train_predicts = np.concatenate(folds_preds, axis=0)
    oofs = pd.DataFrame({"unrelated": train_predicts[:, 0], "agreed": train_predicts[:, 1], "disagreed": train_predicts[:, 2]})
    submit_path = oofs_path + "-Train-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    oofs.to_csv(submit_path, index=False)

    print("Predicting testing results...")
    test_predicts_list = []
    for fold_id, model in enumerate(models):
        test_predicts = model.predict({"first_sentences":tests[0],
                                       "second_sentences":tests[1],
                                       "mata-features":tests[2],
                                       "first_exact_match": tests_1_ems,
                                       "second_exact_match": tests_2_ems,
                                      }, batch_size=128, verbose=1)

        test_predicts_list.append(test_predicts)

    test_predicts = np.zeros(test_predicts_list[0].shape)
    for fold_predict in test_predicts_list:
        test_predicts += fold_predict
    test_predicts /= len(test_predicts_list)

    test_predicts = pd.DataFrame({"unrelated": test_predicts[:, 0], "agreed": test_predicts[:, 1], "disagreed": test_predicts[:, 2]})
    submit_path = output_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    test_predicts.to_csv(submit_path, index=False) # 0.3343
    
    print("Predicting labeled testing results...")
    ids = pd.read_csv("../data/dataset/test.csv")
    pred_labels = test_predicts.idxmax(axis=1)
    sub = pd.DataFrame({"Id": ids['id'].values, "Category": pred_labels})
    submit_path = one_hot_pred_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    sub.to_csv(submit_path, index=False)
    break

Work on model 1
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_17 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_33 (SpatialDr (None, 30, 200)      0           embedding_17[0][0]          

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_18 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                             

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_19 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]       

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_20 (Embedding)        (None, 30, 200)      20000000  

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_21 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
_________________________________________________________________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_22 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_23 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
_____________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_24 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
_______________________________________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
score 0.8636994488988597
Predicting training results...
Predicting testing results...
Predicting labeled testing results...


In [23]:
numpy_weighted_accuracy(labels, oofs[['unrelated', 'agreed', 'disagreed']].values)

NameError: name 'numpy_weighted_accuracy' is not defined

## SGNS

In [27]:
fold_count = 8
embedding_matrix = sgns_bigram_matrix
#embedding_matrix = tencent_ai_matrix
EMBEDDING_DIM = 300

for i in range(1, len(model_manager.models_tag)):
    print("Work on model", i)
    model_tag = model_manager.models_tag[i]
    model_func = model_manager.model_funcs[i]
    #models_checkpoints_path = model_manager.models_checkpoints_pathes[i]
    models_checkpoints_path = "WordSGNS-ESIM-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

    model_submit_prefix = model_manager.submit_predix[i]
    model_class_weights = model_manager.model_class_weights[i]
    model_scale_sample_weights = model_manager.model_scale_sample_weights[i]
    model_patiences = model_manager.model_patiences[i]
    
    model_class_weights = None
    
    def _agent_get_model():
        return get_ESIM(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
        return get_dense_cnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
        return model_func(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
    
    test_predicts_list = []
    oofs_predictions = []
    pre_trained_models = []
        
    trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
    models, score, folds_preds = trainer.train_folds(X=trains, y=labels, tests=tests, augments=None, fold_count=fold_count, batch_size=64,
        em_train_features=em_train_features, em_test_features=em_test_features, pseudo_labels=pseudo_labels,                                      
        scale_sample_weight=model_scale_sample_weights, class_weight=model_class_weights,
        get_model_func=_agent_get_model, 
        patience=7)

    print("score", score)
    oofs_dir = "../data/pseudo/oofs/"
    output_dir = "../data/pseudo/output/"
    onehot_pred_dir = "../data/pseudo/one_hot_pred/"

    model_submit_prefix = "PSWordSGNS-ESIM-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
    
    oofs_path = oofs_dir + model_submit_prefix
    output_path = output_dir + model_submit_prefix
    one_hot_pred_path = onehot_pred_dir + "One-Hot" + model_submit_prefix

    print("Predicting training results...")
    train_predicts = np.concatenate(folds_preds, axis=0)
    oofs = pd.DataFrame({"unrelated": train_predicts[:, 0], "agreed": train_predicts[:, 1], "disagreed": train_predicts[:, 2]})
    submit_path = oofs_path + "-Train-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    oofs.to_csv(submit_path, index=False)

    print("Predicting testing results...")
    test_predicts_list = []
    for fold_id, model in enumerate(models):
        test_predicts = model.predict({"first_sentences":tests[0],
                                       "second_sentences":tests[1],
                                       "mata-features":tests[2],
                                       "first_exact_match": tests_1_ems,
                                       "second_exact_match": tests_2_ems,
                                      }, batch_size=128, verbose=1)

        test_predicts_list.append(test_predicts)

    test_predicts = np.zeros(test_predicts_list[0].shape)
    for fold_predict in test_predicts_list:
        test_predicts += fold_predict
    test_predicts /= len(test_predicts_list)

    test_predicts = pd.DataFrame({"unrelated": test_predicts[:, 0], "agreed": test_predicts[:, 1], "disagreed": test_predicts[:, 2]})
    submit_path = output_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    test_predicts.to_csv(submit_path, index=False) # 0.3343
    
    print("Predicting labeled testing results...")
    ids = pd.read_csv("../data/dataset/test.csv")
    pred_labels = test_predicts.idxmax(axis=1)
    sub = pd.DataFrame({"Id": ids['id'].values, "Category": pred_labels})
    submit_path = one_hot_pred_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    sub.to_csv(submit_path, index=False)
    break

Work on model 1
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_17 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_17 (SpatialDr (None, 30, 300)      0           embedding_17[0][0]          

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_18 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
______________________________________________________________________________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_19 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_21 (Spa

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_20 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
____________________________________________________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_21 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
_____________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_22 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
_____________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_23 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
_____________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_24 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
   

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
score 0.8609507292969848
Predicting training results...
Predicting testing results...
Predicting labeled testing results...


In [None]:
numpy_weighted_accuracy(labels, oofs[['unrelated', 'agreed', 'disagreed']].values)

# DenseRNN

In [21]:
from keras import regularizers

def get_darnn(nb_words, embedding_dim, embedding_matrix, max_sequence_length, out_size,
    projection_dim=50, projection_hidden=0, projection_dropout=0.2,
    compare_dim=288, compare_dropout=0.2,
    dense_dim=50, dense_dropout=0.2,
    lr=1e-3, activation='relu'):

    q1 = Input(shape=(max_sequence_length,), name='first_sentences')
    q2 = Input(shape=(max_sequence_length,), name='second_sentences')

    q1_exact_match = Input(shape=(max_sequence_length,), name='first_exact_match')
    q2_exact_match = Input(shape=(max_sequence_length,), name='second_exact_match')    
    input_layer_3 = Input(shape=(36,), name='mata-features', dtype="float32")
    
    embedding = Embedding(nb_words, embedding_dim,
                          weights=[embedding_matrix],
                          input_length=max_sequence_length,
                          trainable=False)
    
    em_embeddings = Embedding(2, 1,
                     input_length=max_sequence_length,
                     trainable=True)   
    
    q1_embed = embedding(q1)
    q1_embed = SpatialDropout1D(0.1)(q1_embed)
    
    q2_embed = embedding(q2)
    q2_embed = SpatialDropout1D(0.1)(q2_embed)

    th = TimeDistributed(Highway(activation='relu'))
    q1_embed = Dropout(0.1)(th(q1_embed,))
    q2_embed = Dropout(0.1)(th(q2_embed,))    
    
    rnns = [Bidirectional(CuDNNGRU(42, return_sequences=True)) for i in range(3)]
    
    q1_res = []
    q2_res = []
    
    
    for idx, rnn in enumerate(rnns):
        q1_seq = rnn(q1_embed)
        q1_seq = Dropout(0.15)(q1_seq)
        q2_seq = rnn(q2_embed)
        q2_seq = Dropout(0.15)(q2_seq)
        q1_aligned, q2_aligned = soft_attention_alignment(q1_seq, q2_seq)
        
        q1_res.append(q2_aligned)
        q1_res.append(q1_seq)
        
        q2_res.append(q1_aligned)
        q2_res.append(q2_seq)
        
        q1_embed = Concatenate()([q1_embed, q1_seq, q2_aligned,])
        q2_embed = Concatenate()([q2_embed, q2_seq, q1_aligned,])            
        
    # Pooling
    #q1_rep = Flatten()(capsule_pooling(q1_encoded))
    #q2_rep = Flatten()(capsule_pooling(q2_encoded))

    q1_res = Concatenate()(q1_res)
    q2_res = Concatenate()(q2_res)
    
    attn = AttentionWeightedAverage()
    q1_rep = apply_multiple(q1_embed, [GlobalAvgPool1D(), GlobalMaxPool1D(), attn])
    q2_rep = apply_multiple(q2_embed, [GlobalAvgPool1D(), GlobalMaxPool1D(), attn])   
    
    # Classifier
    q_diff = substract(q1_rep, q2_rep)
    q_multi = Multiply()([q1_rep, q2_rep])
    h_all = Concatenate()([q1_rep, q2_rep, q_diff, q_multi,])
    h_all = Dropout(0.35)(h_all)
    h_all = Dense(300, activation='relu')(h_all)
    out_ = Dense(3, activation='softmax')(h_all)

    model = Model(inputs=[q1, q2, input_layer_3, q1_exact_match, q2_exact_match], outputs=out_)
    model.compile(optimizer=Adam(lr=lr, decay=1e-6, clipvalue=1.5), loss='categorical_crossentropy',
    metrics=['accuracy', weighted_accuracy])
    model.summary()
    return model

## TenCent

In [22]:
fold_count = 8
#embedding_matrix = sgns_bigram_matrix
embedding_matrix = tencent_ai_matrix
EMBEDDING_DIM = 200

for i in range(1, len(model_manager.models_tag)):
    print("Work on model", i)
    model_tag = model_manager.models_tag[i]
    model_func = model_manager.model_funcs[i]
    #models_checkpoints_path = model_manager.models_checkpoints_pathes[i]
    models_checkpoints_path = "WordTC-DenseRNN-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

    model_submit_prefix = model_manager.submit_predix[i]
    model_class_weights = model_manager.model_class_weights[i]
    model_scale_sample_weights = model_manager.model_scale_sample_weights[i]
    model_patiences = model_manager.model_patiences[i]
    
    model_class_weights = None
    
    def _agent_get_model():
        return get_darnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
        return get_dense_cnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
        return model_func(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
    
    test_predicts_list = []
    oofs_predictions = []
    pre_trained_models = []
        
    trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
    models, score, folds_preds = trainer.train_folds(X=trains, y=labels, tests=tests, augments=None, fold_count=fold_count, batch_size=1024,
        em_train_features=em_train_features, em_test_features=em_test_features, pseudo_labels=pseudo_labels,                                      
        scale_sample_weight=model_scale_sample_weights, class_weight={0: 1/16, 1: 1/15, 2:1/5},
        get_model_func=_agent_get_model, 
        patience=10)

    print("score", score)
    oofs_dir = "../data/pseudo/oofs/"
    output_dir = "../data/pseudo/output/"
    onehot_pred_dir = "../data/pseudo/one_hot_pred/"

    model_submit_prefix = "PSWordTC-DenseRNN-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
    
    oofs_path = oofs_dir + model_submit_prefix
    output_path = output_dir + model_submit_prefix
    one_hot_pred_path = onehot_pred_dir + "One-Hot" + model_submit_prefix

    print("Predicting training results...")
    train_predicts = np.concatenate(folds_preds, axis=0)
    oofs = pd.DataFrame({"unrelated": train_predicts[:, 0], "agreed": train_predicts[:, 1], "disagreed": train_predicts[:, 2]})
    submit_path = oofs_path + "-Train-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    oofs.to_csv(submit_path, index=False)

    print("Predicting testing results...")
    test_predicts_list = []
    for fold_id, model in enumerate(models):
        test_predicts = model.predict({"first_sentences":tests[0],
                                       "second_sentences":tests[1],
                                       "mata-features":tests[2],
                                       "first_exact_match": tests_1_ems,
                                       "second_exact_match": tests_2_ems,
                                      }, batch_size=128, verbose=1)

        test_predicts_list.append(test_predicts)

    test_predicts = np.zeros(test_predicts_list[0].shape)
    for fold_predict in test_predicts_list:
        test_predicts += fold_predict
    test_predicts /= len(test_predicts_list)

    test_predicts = pd.DataFrame({"unrelated": test_predicts[:, 0], "agreed": test_predicts[:, 1], "disagreed": test_predicts[:, 2]})
    submit_path = output_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    test_predicts.to_csv(submit_path, index=False) # 0.3343
    
    print("Predicting labeled testing results...")
    ids = pd.read_csv("../data/dataset/test.csv")
    pred_labels = test_predicts.idxmax(axis=1)
    sub = pd.DataFrame({"Id": ids['id'].values, "Category": pred_labels})
    submit_path = one_hot_pred_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    sub.to_csv(submit_path, index=False)
    break

Work on model 1




__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_1 (SpatialDro (None, 30, 200)      0           embedding_1[0][0]                
__________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500




__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_3 (SpatialDro (None, 30, 200)      0           embedding_3[0][0]                
__________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_7 (Embedding)         (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                      

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_11 (Embedding)

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_13 (Embedding)

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_15 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
   

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
score 0.8628048700321371
Predicting training results...
Predicting testing results...
Predicting labeled testing results...


In [None]:
def weighted_accuracy(y_true, y_pred):
    weight = np.array([[1/16, 1/15, 1/5]])
    norm = [(1/16) + (1/15) + (1/5)]
    weight_mask = weight * y_true
    label_weights = K.max(K.cast(weight_mask, 'float32'), axis=-1)
    
    true_label = K.argmax(y_true, axis=-1)
    pred_label = K.argmax(y_pred, axis=-1)
    
    res = K.cast(K.equal(true_label, pred_label), tf.float32) * label_weights / K.sum(label_weights)
    res = K.sum(res)
    return res

In [None]:
numpy_weighted_accuracy(labels, oofs[['unrelated', 'agreed', 'disagreed']].values)

# New Models

In [31]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import importlib

from sklearn.metrics import roc_auc_score, log_loss
from keras.callbacks import EarlyStopping, ModelCheckpoint

from iwillwin.config import model_config

class ModelTrainer(object):

    def __init__(self, model_stamp, epoch_num, learning_rate=1e-3,
                 shuffle_inputs=False, verbose_round=40, early_stopping_round=8):
        self.models = []
        self.model_stamp = model_stamp
        self.val_loss = -1
        self.auc = -1
        self.epoch_num = epoch_num
        self.learning_rate = learning_rate
        self.eps = 1e-10
        self.verbose_round = verbose_round
        self.early_stopping_round = early_stopping_round
        self.shuffle_inputs = shuffle_inputs

    def train_folds(self, X, y, fold_count, em_train_features, tests, em_test_features, pseudo_labels, batch_size, get_model_func, augments=None, skip_fold=0, patience=10, scale_sample_weight=False,
                    class_weight=None, self_aware=False, swap_input=False):
        X1, X2, features, = X
        em1, em2 = em_train_features
        features = features
        weight_val=scale_sample_weight

        fold_size = len(X1) // fold_count
        models = []
        fold_predictions = []
        score = 0

        for fold_id in range(0, fold_count):
            fold_start = fold_size * fold_id
            fold_end = fold_start + fold_size

            if fold_id == fold_count - 1:
                fold_end = len(X1)

            train_x1 = np.concatenate([X1[:fold_start], X1[fold_end:], tests[0]])
            train_x2 = np.concatenate([X2[:fold_start], X2[fold_end:], tests[1]])
            train_features = np.concatenate([features[:fold_start], features[fold_end:], tests[2]])
            
            train_em_1 = np.concatenate([em1[:fold_start], em1[fold_end:], em_test_features[0]])
            train_em_2 = np.concatenate([em2[:fold_start], em2[fold_end:], em_test_features[1]])
            
            train_y = np.concatenate([y[:fold_start], y[fold_end:], pseudo_labels])
            
            val_x1 = X1[fold_start:fold_end]
            val_x2 = X2[fold_start:fold_end]
            val_features = features[fold_start:fold_end]
            val_em1 = em1[fold_start:fold_end]
            val_em2 = em2[fold_start:fold_end]
            val_y = y[fold_start:fold_end]

            fold_pos = (np.sum(train_y) / len(train_x1))

            train_data = {
                "first_sentences": train_x1,
                "second_sentences": train_x2,
                "mata-features": train_features,
                "first_exact_match": train_em_1,
                "second_exact_match": train_em_2,
            }

            val_data = {
                "first_sentences": val_x1,
                "second_sentences": val_x2,
                "mata-features": val_features,
                "first_exact_match": val_em1,
                "second_exact_match": val_em2,
            }

            model, bst_val_score, fold_prediction = self._train_model_by_logloss(
                get_model_func(), batch_size, train_data, train_y, val_data, val_y, fold_id, patience, class_weight, weight_val=weight_val)
    
            score += bst_val_score
            models.append(model)
            fold_predictions.append(fold_prediction)

        self.models = models
        self.val_loss = score / fold_count
        return models, self.val_loss, fold_predictions

    def _train_model_by_logloss(self, model, batch_size, train_x, train_y, val_x, val_y, fold_id, patience):
        # return a list which holds [models, val_loss, auc, prediction]
        raise NotImplementedError

class KerasModelTrainer(ModelTrainer):

    def __init__(self, *args, **kwargs):
        super(KerasModelTrainer, self).__init__(*args, **kwargs)
        pass

    def _train_model_by_logloss(self, model, batch_size, train_x, train_y, val_x, val_y, fold_id, patience, class_weight, weight_val):
        early_stopping = EarlyStopping(monitor='val_weighted_accuracy', patience=patience)
        bst_model_path = self.model_stamp + str(fold_id) + '.h5'
        print("Load weights from", bst_model_path)
        model.load_weights(bst_model_path)
        bst_model_path = self.model_stamp + "-pseudo-scaled-" + str(fold_id) + '.h5'
        
        val_data =  (val_x, val_y)
        model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)
        hist = model.fit(train_x, train_y,
                         validation_data=val_data,
                         epochs=self.epoch_num, batch_size=batch_size, shuffle=True,
                         class_weight={0: 1/16, 1:1/15, 2:1/5},
                         callbacks=[early_stopping, model_checkpoint],)
        bst_val_score = max(hist.history['val_weighted_accuracy'])
        model.load_weights(bst_model_path)
        predictions = model.predict(val_x)

        return model, bst_val_score, predictions

In [32]:
def get_decomposable_attention(nb_words, embedding_dim, embedding_matrix, max_sequence_length, out_size,
    projection_dim=50, projection_hidden=0, projection_dropout=0.2,
    compare_dim=288, compare_dropout=0.2,
    dense_dim=50, dense_dropout=0.2,
    lr=1e-3, activation='relu'):

    q1 = Input(shape=(max_sequence_length,), name='first_sentences')
    q2 = Input(shape=(max_sequence_length,), name='second_sentences')
    q1_exact_match = Input(shape=(max_sequence_length,), name='first_exact_match')
    q2_exact_match = Input(shape=(max_sequence_length,), name='second_exact_match')    
    input_layer_3 = Input(shape=(36,), name='mata-features', dtype="float32")
    
    embedding = Embedding(nb_words, embedding_dim,
                          weights=[embedding_matrix],
                          input_length=max_sequence_length,
                          trainable=False)
    
    em_embeddings = Embedding(2, 1,
                     input_length=max_sequence_length,
                     trainable=True)   
    
    #q1_embed = Concatenate()([embedding(q1), em_embeddings(q1_exact_match)])
    q1_embed = embedding(q1)
    q1_embed = SpatialDropout1D(0.1)(q1_embed)
    
    #q2_embed = Concatenate()([embedding(q2), em_embeddings(q2_exact_match)])
    q2_embed = embedding(q2)
    q2_embed = SpatialDropout1D(0.1)(q2_embed)

    th = TimeDistributed(Highway(activation='relu'))
    q1_embed = th(q1_embed)
    q2_embed = th(q2_embed)
        
    q1_aligned, q2_aligned = soft_attention_alignment(q1_embed, q2_embed)
    q1_vec = Concatenate()([q1_embed, q2_aligned, substract(q1_embed, q2_aligned), Multiply()([q1_embed, q2_aligned])])
    q2_vec = Concatenate()([q2_embed, q1_aligned, substract(q2_embed, q1_aligned), Multiply()([q2_embed, q1_aligned])])
    
    dense_compares = [
        Dense(300, activation='elu'),
        Dropout(0.2),
        Dense(200, activation='elu'),
        Dropout(0.2),
    ]

    q1_compared = time_distributed(q1_vec, dense_compares)
    q2_compared = time_distributed(q2_vec, dense_compares)
    
    q1_rep = apply_multiple(q1_compared, [GlobalAvgPool1D(), GlobalMaxPool1D()])
    q2_rep = apply_multiple(q2_compared, [GlobalAvgPool1D(), GlobalMaxPool1D()])    
    
    h_all = Concatenate()([q1_rep, q2_rep])
    h_all = BatchNormalization()(h_all)
    
    h_all = Dense(256, activation='elu')(h_all)
    h_all = Dropout(0.2)(h_all)
    h_all = BatchNormalization()(h_all)

    h_all = Dense(256, activation='elu')(h_all)
    h_all = Dropout(0.2)(h_all)
    h_all = BatchNormalization()(h_all)    
    
    out_ = Dense(3, activation='softmax')(h_all)
    
    model = Model(inputs=[q1, q2, input_layer_3, q1_exact_match, q2_exact_match], outputs=out_)
    model.compile(optimizer=Adam(lr=lr, decay=1e-6, clipnorm=1.5, amsgrad=True), loss='categorical_crossentropy',
    metrics=['accuracy', weighted_accuracy])
    model.summary()
    return model

# Tencent

In [33]:
fold_count = 8
#embedding_matrix = sgns_bigram_matrix
embedding_matrix = tencent_ai_matrix
EMBEDDING_DIM = 200

for i in range(1, len(model_manager.models_tag)):
    print("Work on model", i)
    model_tag = model_manager.models_tag[i]
    model_func = model_manager.model_funcs[i]
    #models_checkpoints_path = model_manager.models_checkpoints_pathes[i]
    models_checkpoints_path = "WordTC-DAttn-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

    model_submit_prefix = model_manager.submit_predix[i]
    model_class_weights = model_manager.model_class_weights[i]
    model_scale_sample_weights = model_manager.model_scale_sample_weights[i]
    model_patiences = model_manager.model_patiences[i]
    
    model_class_weights = None
    
    def _agent_get_model():
        return get_decomposable_attention(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
        return get_dense_cnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
        return model_func(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
    
    test_predicts_list = []
    oofs_predictions = []
    pre_trained_models = []
        
    trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
    models, score, folds_preds = trainer.train_folds(X=trains, y=labels, tests=tests, augments=None, fold_count=fold_count, batch_size=256,
        em_train_features=em_train_features, em_test_features=em_test_features, pseudo_labels=pseudo_labels,                                      
        scale_sample_weight=model_scale_sample_weights, class_weight={0: 1/16, 1: 1/15, 2:1/5},
        get_model_func=_agent_get_model, 
        patience=20)

    print("score", score)
    oofs_dir = "../data/pseudo/oofs/"
    output_dir = "../data/pseudo/output/"
    onehot_pred_dir = "../data/pseudo/one_hot_pred/"

    model_submit_prefix = "PSWordTC-DAttn-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
    
    oofs_path = oofs_dir + model_submit_prefix
    output_path = output_dir + model_submit_prefix
    one_hot_pred_path = onehot_pred_dir + "One-Hot" + model_submit_prefix

    print("Predicting training results...")
    train_predicts = np.concatenate(folds_preds, axis=0)
    oofs = pd.DataFrame({"unrelated": train_predicts[:, 0], "agreed": train_predicts[:, 1], "disagreed": train_predicts[:, 2]})
    submit_path = oofs_path + "-Train-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    oofs.to_csv(submit_path, index=False)

    print("Predicting testing results...")
    test_predicts_list = []
    for fold_id, model in enumerate(models):
        test_predicts = model.predict({"first_sentences":tests[0],
                                       "second_sentences":tests[1],
                                       "mata-features":tests[2],
                                       "first_exact_match": tests_1_ems,
                                       "second_exact_match": tests_2_ems,
                                      }, batch_size=128, verbose=1)

        test_predicts_list.append(test_predicts)

    test_predicts = np.zeros(test_predicts_list[0].shape)
    for fold_predict in test_predicts_list:
        test_predicts += fold_predict
    test_predicts /= len(test_predicts_list)

    test_predicts = pd.DataFrame({"unrelated": test_predicts[:, 0], "agreed": test_predicts[:, 1], "disagreed": test_predicts[:, 2]})
    submit_path = output_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    test_predicts.to_csv(submit_path, index=False) # 0.3343
    
    print("Predicting labeled testing results...")
    ids = pd.read_csv("../data/dataset/test.csv")
    pred_labels = test_predicts.idxmax(axis=1)
    sub = pd.DataFrame({"Id": ids['id'].values, "Category": pred_labels})
    submit_path = one_hot_pred_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    sub.to_csv(submit_path, index=False)
    break

Work on model 1




__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_11 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_21 (SpatialDr (None, 30, 200)      0           embedding_11[0][0]               
__________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_13 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                             

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_15 (Embedding)        (None, 30, 200)      20000000  

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            


__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_17 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_27 (SpatialDr (None, 30, 200)      0           embedding_17[0][0]               
__________________________________________________________________________________________________
spatial_dropout1d_28 (SpatialDr (None, 30, 200)      0           embedding_17[1][0]               
__________________________________________________________________________________________________
time_distr

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
____________________________________________________________________________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_21 (Embedding)        (None, 30, 200)      20000000    first_sentences[0][0]            
                                          

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 7

Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
___________________________________________________________________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_25 (Embedding)        (None, 30, 200)   

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 7

Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
score 0.8629848450517491
Predicting training results...
Predicting testing results...
Predicting labeled testing results...


## SGNS

In [34]:
fold_count = 8
embedding_matrix = sgns_bigram_matrix
#embedding_matrix = tencent_ai_matrix
EMBEDDING_DIM = 300

for i in range(1, len(model_manager.models_tag)):
    print("Work on model", i)
    model_tag = model_manager.models_tag[i]
    model_func = model_manager.model_funcs[i]
    #models_checkpoints_path = model_manager.models_checkpoints_pathes[i]
    models_checkpoints_path = "WordSGNS-DAttn-NoMeta-3P-NoEM-NoClassWeighted-3Layers"

    model_submit_prefix = model_manager.submit_predix[i]
    model_class_weights = model_manager.model_class_weights[i]
    model_scale_sample_weights = model_manager.model_scale_sample_weights[i]
    model_patiences = model_manager.model_patiences[i]
    
    model_class_weights = None
    
    def _agent_get_model():
        return get_decomposable_attention(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
        return get_dense_cnn(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
        return model_func(NB_WORDS, EMBEDDING_DIM, embedding_matrix, MAX_SEQUENCE_LENGTH, OUT_SIZE)
    
    test_predicts_list = []
    oofs_predictions = []
    pre_trained_models = []
        
    trainer = KerasModelTrainer(model_stamp=models_checkpoints_path, epoch_num=500)
    models, score, folds_preds = trainer.train_folds(X=trains, y=labels, tests=tests, augments=None, fold_count=fold_count, batch_size=256,
        em_train_features=em_train_features, em_test_features=em_test_features, pseudo_labels=pseudo_labels,                                      
        scale_sample_weight=model_scale_sample_weights, class_weight={0: 1/16, 1: 1/15, 2:1/5},
        get_model_func=_agent_get_model, 
        patience=20)

    print("score", score)
    oofs_dir = "../data/pseudo/oofs/"
    output_dir = "../data/pseudo/output/"
    onehot_pred_dir = "../data/pseudo/one_hot_pred/"

    model_submit_prefix = "PSWordSGNS-DAttn-NoMeta-3P-NoEM-NoClassWeighted-3Layers"
    
    oofs_path = oofs_dir + model_submit_prefix
    output_path = output_dir + model_submit_prefix
    one_hot_pred_path = onehot_pred_dir + "One-Hot" + model_submit_prefix

    print("Predicting training results...")
    train_predicts = np.concatenate(folds_preds, axis=0)
    oofs = pd.DataFrame({"unrelated": train_predicts[:, 0], "agreed": train_predicts[:, 1], "disagreed": train_predicts[:, 2]})
    submit_path = oofs_path + "-Train-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    oofs.to_csv(submit_path, index=False)

    print("Predicting testing results...")
    test_predicts_list = []
    for fold_id, model in enumerate(models):
        test_predicts = model.predict({"first_sentences":tests[0],
                                       "second_sentences":tests[1],
                                       "mata-features":tests[2],
                                       "first_exact_match": tests_1_ems,
                                       "second_exact_match": tests_2_ems,
                                      }, batch_size=128, verbose=1)

        test_predicts_list.append(test_predicts)

    test_predicts = np.zeros(test_predicts_list[0].shape)
    for fold_predict in test_predicts_list:
        test_predicts += fold_predict
    test_predicts /= len(test_predicts_list)

    test_predicts = pd.DataFrame({"unrelated": test_predicts[:, 0], "agreed": test_predicts[:, 1], "disagreed": test_predicts[:, 2]})
    submit_path = output_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    test_predicts.to_csv(submit_path, index=False) # 0.3343
    
    print("Predicting labeled testing results...")
    ids = pd.read_csv("../data/dataset/test.csv")
    pred_labels = test_predicts.idxmax(axis=1)
    sub = pd.DataFrame({"Id": ids['id'].values, "Category": pred_labels})
    submit_path = one_hot_pred_path + "-L{:4f}-NB{:d}.csv".format(score, NB_WORDS)
    sub.to_csv(submit_path, index=False)
    break

Work on model 1




__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_27 (Embedding)        (None, 30, 300)      30000000    first_sentences[0][0]            
                                                                 second_sentences[0][0]           
__________________________________________________________________________________________________
spatial_dropout1d_37 (SpatialDr (None, 30, 300)      0           embedding_27[0][0]               
__________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_29 (Embedding)        (None, 30, 300)      30000000  

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                      

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
_________________________________________________________________________________________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_35 (Embedding)

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
_______________________________________________________________________________________

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_39 (Embedding)        (None, 30, 300)      30000000    first_sente

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
first_sentences (InputLayer)    (None, 30)           0                                            
__________________________________________________________________________________________________
second_sentences (InputLayer)   (None, 30)           0                                            
__________________________________________________________________________________________________
embedding_41 (Embedding)        (None

Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
score 0.862474109916959
Predicting training results...
Predicting testing results...
Predicting labeled testing results...
