<a href="https://colab.research.google.com/github/wendy60/Hybrid-recommender-system/blob/second-submit/access_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Download dataset**

In [None]:
import urllib.request

In [None]:
import os
import tempfile
import shutil
import urllib
import zipfile
import pandas as pd

# Temporary folder for data we need during execution of this notebook 

temp_dir = os.path.join(tempfile.gettempdir(), 'mind')
os.makedirs(temp_dir, exist_ok=True)

# The dataset is split into training and validation set, each with a large and small version.
# The format of the four files are the same.

base_url = 'https://mind201910small.blob.core.windows.net/release'
training_small_url = f'{base_url}/MINDsmall_train.zip'
validation_small_url = f'{base_url}/MINDsmall_dev.zip'
training_large_url = f'{base_url}/MINDlarge_train.zip'
validation_large_url = f'{base_url}/MINDlarge_dev.zip'

In [None]:
def download_url(url,
                 destination_filename=None,
                 progress_updater=None,
                 force_download=False,
                 verbose=True):
    """
    Download a URL to a temporary file
    """
    if not verbose:
        progress_updater = None
    

    if destination_filename is None:
        url_as_filename = url.replace('://', '_').replace('/', '_')
        destination_filename = \
            os.path.join(temp_dir,url_as_filename)
    if (not force_download) and (os.path.isfile(destination_filename)):
        if verbose:
            print('Bypassing download of already-downloaded file {}'.format(
                os.path.basename(url)))
        return destination_filename
    if verbose:
        print('Downloading file {} to {}'.format(os.path.basename(url),
                                                 destination_filename),
              end='')
    urllib.request.urlretrieve(url, destination_filename, progress_updater)
    assert (os.path.isfile(destination_filename))
    nBytes = os.path.getsize(destination_filename)
    if verbose:
        print('...done, {} bytes.'.format(nBytes))
    return destination_filename

In [None]:
zip_path = download_url(validation_small_url, verbose=True)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(temp_dir)

os.listdir(temp_dir)

Downloading file MINDsmall_dev.zip to /tmp/mind/https_mind201910small.blob.core.windows.net_release_MINDsmall_dev.zip...done, 30945572 bytes.


['news.tsv',
 'https_mind201910small.blob.core.windows.net_release_MINDsmall_dev.zip',
 'relation_embedding.vec',
 'entity_embedding.vec',
 'behaviors.tsv']

In [None]:
# The behaviors.tsv file contains the impression logs and users' news click histories. 
# It has 5 columns divided by the tab symbol:
# - Impression ID. The ID of an impression.
# - User ID. The anonymous ID of a user.
# - Time. The impression time with format "MM/DD/YYYY HH:MM:SS AM/PM".
# - History. The news click history (ID list of clicked news) of this user before this impression.
# - Impressions. List of news displayed in this impression and user's click behaviors on them (1 for click and 0 for non-click).

behaviors_path = os.path.join(temp_dir, 'behaviors.tsv')
behaviors = pd.read_table(
    behaviors_path,
    header=None,
    names=['impression_id', 'user_id', 'time', 'history', 'impressions'])

In [None]:
# The news.tsv file contains the detailed information of news articles involved in the behaviors.tsv file.
# It has 7 columns, which are divided by the tab symbol:
# - News ID
# - Category
# - Subcategory
# - Title
# - Abstract
# - URL
# - Title Entities (entities contained in the title of this news)
# - Abstract Entities (entities contained in the abstract of this news)

news_path = os.path.join(temp_dir, 'news.tsv')
pd.read_table(news_path,
              header=None,
              names=[
                  'id', 'category', 'subcategory', 'title', 'abstract', 'url',
                  'title_entities', 'abstract_entities'
              ])

Unnamed: 0,id,category,subcategory,title,abstract,url,title_entities,abstract_entities
0,N55528,lifestyle,lifestyleroyals,"The Brands Queen Elizabeth, Prince Charles, an...","Shop the notebooks, jackets, and more that the...",https://assets.msn.com/labs/mind/AAGH0ET.html,"[{""Label"": ""Prince Philip, Duke of Edinburgh"",...",[]
1,N18955,health,medical,Dispose of unwanted prescription drugs during ...,,https://assets.msn.com/labs/mind/AAISxPN.html,"[{""Label"": ""Drug Enforcement Administration"", ...",[]
2,N61837,news,newsworld,The Cost of Trump's Aid Freeze in the Trenches...,Lt. Ivan Molchanets peeked over a parapet of s...,https://assets.msn.com/labs/mind/AAJgNsz.html,[],"[{""Label"": ""Ukraine"", ""Type"": ""G"", ""WikidataId..."
3,N53526,health,voices,I Was An NBA Wife. Here's How It Affected My M...,"I felt like I was a fraud, and being an NBA wi...",https://assets.msn.com/labs/mind/AACk2N6.html,[],"[{""Label"": ""National Basketball Association"", ..."
4,N38324,health,medical,"How to Get Rid of Skin Tags, According to a De...","They seem harmless, but there's a very good re...",https://assets.msn.com/labs/mind/AAAKEkt.html,"[{""Label"": ""Skin tag"", ""Type"": ""C"", ""WikidataI...","[{""Label"": ""Skin tag"", ""Type"": ""C"", ""WikidataI..."
...,...,...,...,...,...,...,...,...
42411,N63550,lifestyle,lifestyleroyals,Why Kate & Meghan Were on Different Balconies ...,There's no scandal here. It's all about the or...,https://assets.msn.com/labs/mind/BBWyynu.html,"[{""Label"": ""Meghan, Duchess of Sussex"", ""Type""...",[]
42412,N30345,entertainment,entertainment-celebrity,See the stars at the 2019 Baby2Baby gala,Stars like Chrissy Teigen and Kate Hudson supp...,https://assets.msn.com/labs/mind/BBWyz7N.html,[],"[{""Label"": ""Kate Hudson"", ""Type"": ""P"", ""Wikida..."
42413,N30135,news,newsgoodnews,Tennessee judge holds lawyer's baby as he swea...,Tennessee Court of Appeals Judge Richard Dinki...,https://assets.msn.com/labs/mind/BBWyzI8.html,"[{""Label"": ""Tennessee"", ""Type"": ""G"", ""Wikidata...","[{""Label"": ""Tennessee Court of Appeals"", ""Type..."
42414,N44276,autos,autossports,Best Sports Car Deals for October,,https://assets.msn.com/labs/mind/BBy5rVe.html,"[{""Label"": ""Peugeot RCZ"", ""Type"": ""V"", ""Wikida...",[]


In [None]:
# The entity_embedding.vec file contains the 100-dimensional embeddings
# of the entities learned from the subgraph by TransE method.
# The first column is the ID of entity, and the other columns are the embedding vector values.

entity_embedding_path = os.path.join(temp_dir, 'entity_embedding.vec')
entity_embedding = pd.read_table(entity_embedding_path, header=None)
entity_embedding['vector'] = entity_embedding.iloc[:, 1:101].values.tolist()
entity_embedding = entity_embedding[[0,
                                     'vector']].rename(columns={0: "entity"})
entity_embedding

Unnamed: 0,entity,vector
0,Q34433,"[0.017808, -0.07325599999999999, 0.102521, -0...."
1,Q41,"[-0.063388, -0.181451, 0.057501, -0.091254, -0..."
2,Q56037,"[0.02155, -0.044888, -0.027872000000000004, -0..."
3,Q1860,"[0.060958000000000005, 0.06993400000000001, 0...."
4,Q39631,"[-0.093106, -0.052002, 0.020556, -0.020801, 0...."
...,...,...
22888,Q278846,"[0.042413, 0.021957, 0.07241399999999999, -0.0..."
22889,Q54621949,"[-0.018299, -0.048378, -0.021644999999999998, ..."
22890,Q42225228,"[-0.051346, -0.028947000000000004, -0.07587, 0..."
22891,Q54862508,"[-0.052323, -0.078029, -0.060925, -0.052536, 0..."


In [None]:
# The relation_embedding.vec file contains the 100-dimensional embeddings
# of the relations learned from the subgraph by TransE method.
# The first column is the ID of relation, and the other columns are the embedding vector values.

relation_embedding_path = os.path.join(temp_dir, 'relation_embedding.vec')
relation_embedding = pd.read_table(relation_embedding_path, header=None)
relation_embedding['vector'] = relation_embedding.iloc[:,
                                                       1:101].values.tolist()
relation_embedding = relation_embedding[[0, 'vector'
                                         ]].rename(columns={0: "relation"})
relation_embedding

Unnamed: 0,relation,vector
0,P31,"[-0.07346699999999999, -0.132227, 0.034173, -0..."
1,P21,"[-0.078436, 0.108589, -0.049429, -0.131355, 0...."
2,P106,"[-0.052137, 0.052444000000000005, -0.019886, -..."
3,P735,"[-0.051398, 0.056219000000000005, 0.0680289999..."
4,P108,"[0.09123099999999999, 0.022525999999999997, 0...."
...,...,...
1086,P1897,"[-0.019021, 0.001183, -0.009602, -0.040833, -0..."
1087,P3776,"[-0.018365, 0.028526, -0.025934, 0.032296, -0...."
1088,P1194,"[-0.026819, 0.0032310000000000004, -0.011298, ..."
1089,P2502,"[0.003554, -0.041121, -0.010559, -0.037862, -0..."


In [None]:
import matplotlib.pyplot as plt

# **Read data from google drive**


In [None]:
#link google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#enter the folder
!ls
import os
os.chdir('/content/drive/My Drive/graduation-project')
!ls

drive  result.png  sample_data
'access data.ipynb'
'collaborative filtering.ipynb'
'data operator'
'DKN : Deep Knowledge-Aware Network for News Recommendation.ipynb'
' LSTUR: Neural News Recommendation with Long- and Short-term User Representations.ipynb'
 MINDsmall_train
'NAML: Neural News Recommendation with Attentive Multi-View Learning.ipynb'
 NCF.ipynb
'NPA: Neural News Recommendation with Personalized Attention.ipynb'
'NRMS: Neural News Recommendation with Multi-Head Self-Attention.ipynb'
 recommenders-main
 SLi-REC.ipynb
 sli_rec-master
 Untitled0.ipynb


# Generate the dataset for SLi-Rec model

In [None]:
import pandas as pd


In [None]:
test_data = pd.read_table("data1/test_data", sep = '\t', names=None, index_col=None)

In [None]:
test_data.head()

Unnamed: 0,1,A3R27T4HADWFFJ,B001PR0Y4O,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies","1138752000,1387584000"
0,0,A3R27T4HADWFFJ,B000CPH9XY,Arena Rock,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
1,0,A3R27T4HADWFFJ,6303828035,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
2,0,A3R27T4HADWFFJ,1559409002,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
3,0,A3R27T4HADWFFJ,B001NP8PQM,Rock,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
4,0,A3R27T4HADWFFJ,B000LQN1LI,TV,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000


In [None]:
test_copy = test_data[:156965]

In [None]:
test_copy.head()

Unnamed: 0,1,A3R27T4HADWFFJ,B001PR0Y4O,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies","1138752000,1387584000"
0,0,A3R27T4HADWFFJ,B000CPH9XY,Arena Rock,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
1,0,A3R27T4HADWFFJ,6303828035,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
2,0,A3R27T4HADWFFJ,1559409002,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
3,0,A3R27T4HADWFFJ,B001NP8PQM,Rock,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
4,0,A3R27T4HADWFFJ,B000LQN1LI,TV,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000


In [None]:
test_copy.to_csv("data1/test_copy", header= 0, index = 0, sep = '\t')

In [None]:
test_data = pd.read_table("data1/test_data", names = ['label', 'user_id', 'item_id', 'category_id', 'timestamp', 'history_item_ids', 'history_category_ids', 'history_timestamp'])
test_data.head()

Unnamed: 0,label,user_id,item_id,category_id,timestamp,history_item_ids,history_category_ids,history_timestamp
0,1,A3R27T4HADWFFJ,B001PR0Y4O,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
1,0,A3R27T4HADWFFJ,B000CPH9XY,Arena Rock,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
2,0,A3R27T4HADWFFJ,6303828035,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
3,0,A3R27T4HADWFFJ,1559409002,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
4,0,A3R27T4HADWFFJ,B001NP8PQM,Rock,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000


In [None]:
test_copy = test_data[:156965]

In [None]:
test_copy.to_csv("data1/test_copy")

In [None]:
train_data = pd.read_table("data1/train_data.csv", names = ['label', 'user_id', 'item_id', 'category_id', 'timestamp', 'history_item_ids', 'history_category_ids', 'history_timestamp'])
train_data.head()

  if self.run_code(code, result):


Unnamed: 0,label,user_id,item_id,category_id,timestamp,history_item_ids,history_category_ids,history_timestamp
0,1,U13740,N55528,lifestyle,1622870079,N55189 N42782 N34694 N45794 N18445 N63302 N104...,lifestyleroyals,1622870079
1,2,U91836,N19639,health,1617223405,N31739 N6072 N63045 N23979 N35656 N43353 N8129...,weightloss,1617223405
2,3,U73700,N61837,news,1628756532,N10732 N25792 N7563 N21087 N41087 N5445 N60384...,newsworld,1628756532
3,4,U34670,N53526,health,1621566684,N45729 N2203 N871 N53880 N41375 N43142 N33013 ...,voices,1621566684
4,5,U8125,N38324,health,1613186155,N10078 N56514 N14904 N33740,medical,1613186155


In [None]:
train_data.count

<bound method DataFrame.count of          label user_id  ... history_category_ids history_timestamp
0            1  U13740  ...      lifestyleroyals        1622870079
1            2  U91836  ...           weightloss        1617223405
2            3  U73700  ...            newsworld        1628756532
3            4  U34670  ...               voices        1621566684
4            5   U8125  ...              medical        1613186155
...        ...     ...  ...                  ...               ...
156960  156961  U21593  ...                  NaN        1632931726
156961  156962  U10123  ...                  NaN        1612985391
156962  156963  U75630  ...                  NaN        1620455229
156963  156964  U44625  ...                  NaN        1613477904
156964  156965  U64800  ...                  NaN        1639272965

[156965 rows x 8 columns]>

In [None]:
import pandas as pd

test_data = pd.read_csv("data1/test_data", sep = '\t')[:156965]
test_data.count

<bound method DataFrame.count of         1  ...                              1138752000,1387584000
0       0  ...                              1138752000,1387584000
1       0  ...                              1138752000,1387584000
2       0  ...                              1138752000,1387584000
3       0  ...                              1138752000,1387584000
4       0  ...                              1138752000,1387584000
...    ..  ...                                                ...
156960  0  ...  1095638400,1216598400,1305244800,1331856000,14...
156961  0  ...  1095638400,1216598400,1305244800,1331856000,14...
156962  0  ...  1095638400,1216598400,1305244800,1331856000,14...
156963  0  ...  1095638400,1216598400,1305244800,1331856000,14...
156964  0  ...  1095638400,1216598400,1305244800,1331856000,14...

[156965 rows x 8 columns]>

In [None]:
behaviors = pd.read_csv("data1/behaviors.tsv", sep = '\t')
behaviors.head()

Unnamed: 0,1,U13740,11/11/2019 9:05:58 AM,N55189 N42782 N34694 N45794 N18445 N63302 N10414 N19347 N31801,N55689-1 N35729-0
0,2,U91836,11/12/2019 6:11:30 PM,N31739 N6072 N63045 N23979 N35656 N43353 N8129...,N20678-0 N39317-0 N58114-0 N20495-0 N42977-0 N...
1,3,U73700,11/14/2019 7:01:48 AM,N10732 N25792 N7563 N21087 N41087 N5445 N60384...,N50014-0 N23877-0 N35389-0 N49712-0 N16844-0 N...
2,4,U34670,11/11/2019 5:28:05 AM,N45729 N2203 N871 N53880 N41375 N43142 N33013 ...,N35729-0 N33632-0 N49685-1 N27581-0
3,5,U8125,11/12/2019 4:11:21 PM,N10078 N56514 N14904 N33740,N39985-0 N36050-0 N16096-0 N8400-1 N22407-0 N6...
4,6,U19739,11/11/2019 6:52:13 PM,N39074 N14343 N32607 N32320 N22007 N442 N19001...,N21119-1 N53696-0 N33619-1 N25722-0 N2869-0


In [None]:
news = pd.read_csv("data1/news.tsv", sep = '\t')
news.head()

Unnamed: 0,N55528,lifestyle,lifestyleroyals,"The Brands Queen Elizabeth, Prince Charles, and Prince Philip Swear By","Shop the notebooks, jackets, and more that the royals can't live without.",https://assets.msn.com/labs/mind/AAGH0ET.html,"[{""Label"": ""Prince Philip, Duke of Edinburgh"", ""Type"": ""P"", ""WikidataId"": ""Q80976"", ""Confidence"": 1.0, ""OccurrenceOffsets"": [48], ""SurfaceForms"": [""Prince Philip""]}, {""Label"": ""Charles, Prince of Wales"", ""Type"": ""P"", ""WikidataId"": ""Q43274"", ""Confidence"": 1.0, ""OccurrenceOffsets"": [28], ""SurfaceForms"": [""Prince Charles""]}, {""Label"": ""Elizabeth II"", ""Type"": ""P"", ""WikidataId"": ""Q9682"", ""Confidence"": 0.97, ""OccurrenceOffsets"": [11], ""SurfaceForms"": [""Queen Elizabeth""]}]",[]
0,N19639,health,weightloss,50 Worst Habits For Belly Fat,These seemingly harmless habits are holding yo...,https://assets.msn.com/labs/mind/AAB19MK.html,"[{""Label"": ""Adipose tissue"", ""Type"": ""C"", ""Wik...","[{""Label"": ""Adipose tissue"", ""Type"": ""C"", ""Wik..."
1,N61837,news,newsworld,The Cost of Trump's Aid Freeze in the Trenches...,Lt. Ivan Molchanets peeked over a parapet of s...,https://assets.msn.com/labs/mind/AAJgNsz.html,[],"[{""Label"": ""Ukraine"", ""Type"": ""G"", ""WikidataId..."
2,N53526,health,voices,I Was An NBA Wife. Here's How It Affected My M...,"I felt like I was a fraud, and being an NBA wi...",https://assets.msn.com/labs/mind/AACk2N6.html,[],"[{""Label"": ""National Basketball Association"", ..."
3,N38324,health,medical,"How to Get Rid of Skin Tags, According to a De...","They seem harmless, but there's a very good re...",https://assets.msn.com/labs/mind/AAAKEkt.html,"[{""Label"": ""Skin tag"", ""Type"": ""C"", ""WikidataI...","[{""Label"": ""Skin tag"", ""Type"": ""C"", ""WikidataI..."
4,N2073,sports,football_nfl,Should NFL be able to fine players for critici...,Several fines came down against NFL players fo...,https://assets.msn.com/labs/mind/AAJ4lap.html,"[{""Label"": ""National Football League"", ""Type"":...","[{""Label"": ""National Football League"", ""Type"":..."


In [None]:
test_data.iloc[:,1] = behaviors.iloc[:,1]

In [None]:
test_data.iloc[:,2] = news.iloc[:,0]

In [None]:
test_data.iloc[:,3] = news.iloc[:,1]

In [None]:
test_data.iloc[:,5] = behaviors.iloc[:,3]

In [None]:
test_data.iloc[:,6] = news.iloc[:,2]

In [None]:
test_data.head(10)

Unnamed: 0,1,A3R27T4HADWFFJ,B001PR0Y4O,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies","1138752000,1387584000"
0,0,U91836,N19639,health,1391040000,N31739 N6072 N63045 N23979 N35656 N43353 N8129...,weightloss,11387520001387584000
1,0,U73700,N61837,news,1391040000,N10732 N25792 N7563 N21087 N41087 N5445 N60384...,newsworld,11387520001387584000
2,0,U34670,N53526,health,1391040000,N45729 N2203 N871 N53880 N41375 N43142 N33013 ...,voices,11387520001387584000
3,0,U8125,N38324,health,1391040000,N10078 N56514 N14904 N33740,medical,11387520001387584000
4,0,U19739,N2073,sports,1391040000,N39074 N14343 N32607 N32320 N22007 N442 N19001...,football_nfl,11387520001387584000
5,0,U8355,N49186,weather,1391040000,N8419 N15771 N1431 N5888 N18663 N24123 N22130 ...,weathertopstories,11387520001387584000
6,0,U46596,N59295,news,1391040000,N47438 N20950 N21317 N5469,newsworld,11387520001387584000
7,0,U79199,N24510,entertainment,1391040000,N37083 N459 N29499 N38118 N37378 N24691 N27235...,gaming,11387520001387584000
8,0,U53231,N39237,news,1391040000,N58936 N15919 N11917 N2153 N55312 N13008 N4142...,newsscienceandtechnology,11387520001387584000
9,1,U89744,N9721,health,1402012800,N24422 N25287 N39121 N41777 N58226 N119 N29197...,nutrition,"1361145600,1367625600,1367625600,1392681600,13..."


In [None]:
help = test_data.to_csv("data1/final", sep = '\t', header = 0, index = 0)


# **validation dataset**

In [None]:
#link google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#enter the folder
!ls
import os
os.chdir('/content/drive/My Drive/graduation-project/recommenders-main/data1')
!ls

behaviors.tsv	      news.tsv		      test_data
entity_embedding.vec  relation_embedding.vec
behaviors.tsv	    final	    news.tsv  train_data.csv  valid
category_vocab.pkl  item_vocab.pkl  summary   user_vocab.pkl


In [None]:
import pandas as pd


In [None]:
behaviors1 = pd.read_csv("behaviors.tsv", sep = '\t')
behaviors1.head()

Unnamed: 0,1,U80234,11/15/2019 12:37:50 PM,N55189 N46039 N51741 N53234 N11276 N264 N40716 N28088 N43955 N6616 N47686 N63573 N38895 N30924 N35671,N28682-0 N48740-0 N31958-1 N34130-0 N6916-0 N5472-0 N50775-0 N24802-0 N19990-0 N33176-0 N62365-0 N5940-0 N6400-0 N58098-0 N42844-0 N49285-0 N51470-0 N53572-0 N11930-0 N21679-0 N55237-0 N29862-0
0,2,U60458,11/15/2019 7:11:50 AM,N58715 N32109 N51180 N33438 N54827 N28488 N611...,N20036-0 N23513-1 N32536-0 N46976-0 N35216-0 N...
1,3,U44190,11/15/2019 9:55:12 AM,N56253 N1150 N55189 N16233 N61704 N51706 N5303...,N36779-0 N62365-0 N58098-0 N5472-0 N13408-0 N5...
2,4,U87380,11/15/2019 3:12:46 PM,N63554 N49153 N28678 N23232 N43369 N58518 N444...,N6950-0 N60215-0 N6074-0 N11930-0 N6916-0 N248...
3,5,U9444,11/15/2019 8:25:46 AM,N51692 N18285 N26015 N22679 N55556,N5940-1 N23513-0 N49285-0 N23355-0 N19990-0 N3...
4,6,U69606,11/15/2019 1:24:44 PM,N879 N19591 N63054 N53033 N54088 N34140 N14952...,N29862-0 N48740-0 N11390-0 N5472-0 N53572-0 N2...


In [None]:
behaviors1.count

<bound method DataFrame.count of            1  ... N28682-0 N48740-0 N31958-1 N34130-0 N6916-0 N5472-0 N50775-0 N24802-0 N19990-0 N33176-0 N62365-0 N5940-0 N6400-0 N58098-0 N42844-0 N49285-0 N51470-0 N53572-0 N11930-0 N21679-0 N55237-0 N29862-0
0          2  ...  N20036-0 N23513-1 N32536-0 N46976-0 N35216-0 N...                                                                                                                                               
1          3  ...  N36779-0 N62365-0 N58098-0 N5472-0 N13408-0 N5...                                                                                                                                               
2          4  ...  N6950-0 N60215-0 N6074-0 N11930-0 N6916-0 N248...                                                                                                                                               
3          5  ...  N5940-1 N23513-0 N49285-0 N23355-0 N19990-0 N3...                                                   

In [None]:
news1 = pd.read_csv("news.tsv", sep = '\t')
news1.head()

Unnamed: 0,N55528,lifestyle,lifestyleroyals,"The Brands Queen Elizabeth, Prince Charles, and Prince Philip Swear By","Shop the notebooks, jackets, and more that the royals can't live without.",https://assets.msn.com/labs/mind/AAGH0ET.html,"[{""Label"": ""Prince Philip, Duke of Edinburgh"", ""Type"": ""P"", ""WikidataId"": ""Q80976"", ""Confidence"": 1.0, ""OccurrenceOffsets"": [48], ""SurfaceForms"": [""Prince Philip""]}, {""Label"": ""Charles, Prince of Wales"", ""Type"": ""P"", ""WikidataId"": ""Q43274"", ""Confidence"": 1.0, ""OccurrenceOffsets"": [28], ""SurfaceForms"": [""Prince Charles""]}, {""Label"": ""Elizabeth II"", ""Type"": ""P"", ""WikidataId"": ""Q9682"", ""Confidence"": 0.97, ""OccurrenceOffsets"": [11], ""SurfaceForms"": [""Queen Elizabeth""]}]",[]
0,N18955,health,medical,Dispose of unwanted prescription drugs during ...,,https://assets.msn.com/labs/mind/AAISxPN.html,"[{""Label"": ""Drug Enforcement Administration"", ...",[]
1,N61837,news,newsworld,The Cost of Trump's Aid Freeze in the Trenches...,Lt. Ivan Molchanets peeked over a parapet of s...,https://assets.msn.com/labs/mind/AAJgNsz.html,[],"[{""Label"": ""Ukraine"", ""Type"": ""G"", ""WikidataId..."
2,N53526,health,voices,I Was An NBA Wife. Here's How It Affected My M...,"I felt like I was a fraud, and being an NBA wi...",https://assets.msn.com/labs/mind/AACk2N6.html,[],"[{""Label"": ""National Basketball Association"", ..."
3,N38324,health,medical,"How to Get Rid of Skin Tags, According to a De...","They seem harmless, but there's a very good re...",https://assets.msn.com/labs/mind/AAAKEkt.html,"[{""Label"": ""Skin tag"", ""Type"": ""C"", ""WikidataI...","[{""Label"": ""Skin tag"", ""Type"": ""C"", ""WikidataI..."
4,N2073,sports,football_nfl,Should NFL be able to fine players for critici...,Several fines came down against NFL players fo...,https://assets.msn.com/labs/mind/AAJ4lap.html,"[{""Label"": ""National Football League"", ""Type"":...","[{""Label"": ""National Football League"", ""Type"":..."


In [None]:
news.count

<bound method DataFrame.count of        N55528  ...                                                 []
0      N19639  ...  [{"Label": "Adipose tissue", "Type": "C", "Wik...
1      N61837  ...  [{"Label": "Ukraine", "Type": "G", "WikidataId...
2      N53526  ...  [{"Label": "National Basketball Association", ...
3      N38324  ...  [{"Label": "Skin tag", "Type": "C", "WikidataI...
4       N2073  ...  [{"Label": "National Football League", "Type":...
...       ...  ...                                                ...
51276  N16909  ...  [{"Label": "Woolsey Fire", "Type": "N", "Wikid...
51277  N47585  ...                                                 []
51278   N7482  ...                                                 []
51279  N34418  ...                                                 []
51280  N44276  ...                                                 []

[51281 rows x 8 columns]>

In [None]:
test_data = pd.read_table("test_data", sep = '\t', names=None, index_col=None)
test_data.head()

Unnamed: 0,1,A3R27T4HADWFFJ,B001PR0Y4O,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies","1138752000,1387584000"
0,0,A3R27T4HADWFFJ,B000CPH9XY,Arena Rock,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
1,0,A3R27T4HADWFFJ,6303828035,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
2,0,A3R27T4HADWFFJ,1559409002,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
3,0,A3R27T4HADWFFJ,B001NP8PQM,Rock,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000
4,0,A3R27T4HADWFFJ,B000LQN1LI,TV,1391040000,"6302595916,B000I6BJ56","Movies,Movies",11387520001387584000


In [None]:
valid = test_data.copy()
valid.iloc[:,1] = behaviors1.iloc[:,1]

In [None]:
valid.iloc[:,2] = news1.iloc[:,0]

In [None]:
valid.iloc[:,3] = news1.iloc[:,1]

In [None]:
valid.iloc[:,5] = behaviors1.iloc[:,3]

In [None]:
valid.iloc[:,6] = news1.iloc[:,2]

In [None]:
valid.head()

Unnamed: 0,1,A3R27T4HADWFFJ,B001PR0Y4O,Movies,1391040000,"6302595916,B000I6BJ56","Movies,Movies","1138752000,1387584000"
0,0,U60458,N18955,health,1391040000,N58715 N32109 N51180 N33438 N54827 N28488 N611...,medical,11387520001387584000
1,0,U44190,N61837,news,1391040000,N56253 N1150 N55189 N16233 N61704 N51706 N5303...,newsworld,11387520001387584000
2,0,U87380,N53526,health,1391040000,N63554 N49153 N28678 N23232 N43369 N58518 N444...,voices,11387520001387584000
3,0,U9444,N38324,health,1391040000,N51692 N18285 N26015 N22679 N55556,medical,11387520001387584000
4,0,U69606,N2073,sports,1391040000,N879 N19591 N63054 N53033 N54088 N34140 N14952...,football_nfl,11387520001387584000


In [None]:
valid.to_csv("valid", sep = '\t', header = 0, index = 0)


In [None]:
valid = pd.read_csv("valid", sep = '\t')
valid.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,0,U60458,N18955,health,1391040000,N58715 N32109 N51180 N33438 N54827 N28488 N61186 N34775 N33742 N50020 N57061 N30924 N6778,medical,"1138752000,1387584000"
0,0,U44190,N61837,news,1391040000,N56253 N1150 N55189 N16233 N61704 N51706 N5303...,newsworld,11387520001387584000
1,0,U87380,N53526,health,1391040000,N63554 N49153 N28678 N23232 N43369 N58518 N444...,voices,11387520001387584000
2,0,U9444,N38324,health,1391040000,N51692 N18285 N26015 N22679 N55556,medical,11387520001387584000
3,0,U69606,N2073,sports,1391040000,N879 N19591 N63054 N53033 N54088 N34140 N14952...,football_nfl,11387520001387584000
4,0,U70421,N11429,news,1391040000,N38118 N55189 N16233 N37942 N23105 N27526 N965...,newsscienceandtechnology,11387520001387584000


In [None]:
valid.iloc[0:,]

Unnamed: 0,0,U60458,N18955,health,1391040000,N58715 N32109 N51180 N33438 N54827 N28488 N61186 N34775 N33742 N50020 N57061 N30924 N6778,medical,"1138752000,1387584000"
0,0,U44190,N61837,news,1391040000,N56253 N1150 N55189 N16233 N61704 N51706 N5303...,newsworld,11387520001387584000
1,0,U87380,N53526,health,1391040000,N63554 N49153 N28678 N23232 N43369 N58518 N444...,voices,11387520001387584000
2,0,U9444,N38324,health,1391040000,N51692 N18285 N26015 N22679 N55556,medical,11387520001387584000
3,0,U69606,N2073,sports,1391040000,N879 N19591 N63054 N53033 N54088 N34140 N14952...,football_nfl,11387520001387584000
4,0,U70421,N11429,news,1391040000,N38118 N55189 N16233 N37942 N23105 N27526 N965...,newsscienceandtechnology,11387520001387584000
...,...,...,...,...,...,...,...,...
471743,0,,,,1399507200,,,"1397779200,1397779200,1397779200,1397347200,13..."
471744,0,,,,1399507200,,,"1397779200,1397779200,1397779200,1397347200,13..."
471745,0,,,,1399507200,,,"1397779200,1397779200,1397779200,1397347200,13..."
471746,0,,,,1399507200,,,"1397779200,1397779200,1397779200,1397347200,13..."


# **Calculate the hybrid model**

In [None]:
import numpy as np
LSTUR = np.array([0.6428, 0.2985, 0.3314, 0.3929])
SLi_Rec = np.array([0.5069, 0.6937, 0.6728, 0.7019])

In [None]:
x = LSTUR - SLi_Rec
print(x)

[ 0.1359 -0.3952 -0.3414 -0.309 ]


In [None]:
## y = a (LSTUR - SLi-Rec) + SLi-Rec
## a from 0 to 1

In [None]:
a = 0.0
result = a * x + SLi_Rec
print(result)

[0.5069 0.6937 0.6728 0.7019]


In [None]:
a = 0.1
result = a * x + SLi_Rec
print(result)

[0.52049 0.65418 0.63866 0.671  ]


In [None]:
a = 0.2
result = a * x + SLi_Rec
print(result)

[0.53408 0.61466 0.60452 0.6401 ]


In [None]:
a = 0.3
result = a * x + SLi_Rec
print(result)

[0.54767 0.57514 0.57038 0.6092 ]


In [None]:
a = 0.4
result = a * x + SLi_Rec
print(result)

[0.56126 0.53562 0.53624 0.5783 ]


In [None]:
a = 0.5
result = a * x + SLi_Rec
print(result)

[0.57485 0.4961  0.5021  0.5474 ]


In [None]:
a = 0.6
result = a * x + SLi_Rec
print(result)

[0.58844 0.45658 0.46796 0.5165 ]


In [None]:
a = 0.7
result = a * x + SLi_Rec
print(result)

[0.60203 0.41706 0.43382 0.4856 ]


In [None]:
a = 0.8
result = a * x + SLi_Rec
print(result)

[0.61562 0.37754 0.39968 0.4547 ]


In [None]:
a = 0.9
result = a * x + SLi_Rec
print(result)

[0.62921 0.33802 0.36554 0.4238 ]


In [None]:
a = 1.0
result = a * x + SLi_Rec
print(result)

[0.6428 0.2985 0.3314 0.3929]
