In [81]:
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
import os
import operator
import matplotlib.pyplot as plt
%matplotlib inline
import sys
from pprint import pprint
from tensorflow import logging

In [42]:
def get_time_reward(timestramp):
    '''
        Args:
            timestrap: 直播的时间戳
        Return:
            weight: 时间的权重
    '''
    fix_timestramp = 1590802264
    total = 24*60*60
    delta = (fix_timestramp-int(timestramp))/total
    # 进行拉普拉斯修正
    return round(1/(1+delta), 3)

In [142]:
def get_anchor_style(input_file):
    '''
        Args:
            input_file: 主播直播信息
        Return:
            一个字典: key:anchor_id, val:[(style1, score1), (style2, score2)]
    '''
    topk = 2
    if not os.path.exists (input_file):
        return {}
    logging.info("Loading the {0} file !!!".format(input_file))
    anchor_record = {}
    # 主播列表
    anchors = []
    with open(input_file, encoding='utf-8') as f:
        lines = f.readlines()
        line_num = 0
        for line in lines:
            # 第一行是我们的标题信息
            if line_num == 0:
                line_num += 1
                continue
            items = line.strip().split(',')
#             print(items)
            anchor_id, anchor_nickname, item_id, item_cate, anchor_scores, timestramp \
            = items[0], items[1], items[2], items[3], items[4:-1], items[-1]
            anchors.append(anchor_nickname)
            logging.info("===============================================================")
            reward, like, barrage, watch = int(anchor_scores[0]), int(anchor_scores[1]), int(anchor_scores[3]), int(anchor_scores[3])
            # 得到时间的衰减函数
            time_weight = get_time_reward(timestramp)
            weight = np.array([0.4, 0.3, 0.2, 0.1])
            # 直播信息
            anchor_info = np.array([reward, like, barrage, watch])
            # 计算出主播直播某个风格的得分
            score = np.dot(anchor_info, weight)*time_weight
#             logging.info("===============================================================")
            print("{0} 主播直播{1}的得分为{2}".format(anchor_nickname, item_cate, score))
            if anchor_nickname not in anchor_record:
                anchor_record[anchor_nickname] = []
            # 得到主播的擅长直播的操作
            anchor_record[anchor_nickname].append({item_cate: score})
        logging.info("===================主播直播类型得分===================================")
        pprint(anchor_record)
        # 按分数对风格进行排序
        anchor_recode_sort = {}
        for anchor_name in anchors:
            if anchor_name not in anchor_recode_sort:
                anchor_recode_sort[anchor_name] = []
            for style_score in anchor_record[anchor_name]:
                anchor_recode_sort[anchor_name].append(sorted(style_score.items(), key=operator.itemgetter(1), reverse=True))
        return anchor_recode_sort            

In [143]:
input_file = '../主播.txt'
# 得到排序的表格啦
anchor_record = get_anchor_style(input_file)
# print(anchor_record)

INFO:tensorflow:Loading the ../主播.txt file !!!
大小花 主播直播女装的得分为268.94
大小花 主播直播多肉的得分为72.34920000000001
大小花 主播直播珠宝的得分为79.15320000000001
杰克jk 主播直播男装的得分为255.46920000000006
杰克jk 主播直播篮球的得分为314.874
杰克jk 主播直播美食的得分为130.9151
简匠文玩 主播直播珠宝的得分为318.8996
简匠文玩 主播直播篮球的得分为19.8702
{'大小花': [{'女装': 268.94}, {'多肉': 72.34920000000001}, {'珠宝': 79.15320000000001}],
 '杰克jk': [{'男装': 255.46920000000006}, {'篮球': 314.874}, {'美食': 130.9151}],
 '简匠文玩': [{'珠宝': 318.8996}, {'篮球': 19.8702}]}


In [152]:
items_info = '../商品信息.txt'
def read_items(input_file):
    '''
        Args:
            input_file: 商品信息文件
        Return:
            a dict: [key:商品id, value: 商品类别]
    '''
    items_cate = ["", "女装", "男装", "珠宝", "美食", "篮球", "多肉"]
    if not os.path.exists(input_file):
        return {}
    item_recode = {}
    logging.info("正在加载商品文件,请骚等…………")
    with open(input_file, encoding='utf-8') as f:
        lines = f.readlines()
        line_num = 0
        for line in lines:
            if line_num == 0:
                line_num += 1
                continue
            items = line.strip().split(',')
            if items_cate[int(items[1])] not in item_recode:
                item_recode[items_cate[int(items[1])]] = []
            item_recode[items_cate[int(items[1])]].append(items[0])
    logging.info("================全部商品信息======================")
    pprint(item_recode)
    return item_recode
# 得到商品的信息
item_recode = read_items(items_info)

INFO:tensorflow:正在加载商品文件,请骚等…………
{'多肉': ['6666', '7777'],
 '女装': ['1111'],
 '珠宝': ['3333'],
 '男装': ['2222', '8888'],
 '篮球': ['5555'],
 '美食': ['4444']}


主播刻画(风格： 权重)：   
     {'大小花': [{'女装': 268.94}, {'多肉': 72.34920000000001}, {'珠宝': 79.15320000000001}],  
      '杰克jk': [{'男装': 255.46920000000006}, {'篮球': 314.874}, {'美食': 130.9151}],  
      '简匠文玩': [{'珠宝': 318.8996}, {'篮球': 19.8702}]}  
 商品刻画(类别： 商品id)：  
     {'多肉': ['6666', '7777'],  
      '女装': ['1111'],  
      '珠宝': ['3333'],  
      '男装': ['2222', '8888'],  
      '篮球': ['5555'],  
      '美食': ['4444']}

In [162]:
def recommand(anchor_name, item_recode, anchor_record, topk = 2):
    '''
        Args:
            anchor_name: 通过主播的名字来斤进行推荐
            item_recoder: 商品的信息
            anchor_recoder: 主播擅长直播的信息表
            topk: 表示推荐几个商品
        Return:
         a dict: 返回{'主播名称': [推荐商品id1， 推荐商品id2， ……]}
    '''
    # 记录主播擅长的风格
    good_style = [] 
    # 默认取两个最擅长的类型
    for anchor_dict in anchor_record[anchor_name][:2]:
        good_style.append(anchor_dict[0][0])
    logging.info("================{0}最擅长直播的类型:=======================".format(anchor_name))
    pprint(good_style)
    logging.info("推荐系统正在推荐，请骚等…………")
    recommand_items = {}
    if anchor_name not in recommand_items:
        recommand_items[anchor_name] = []
    for cate in good_style:
        recommand_items[anchor_name].append(item_recode[cate])
    logging.info("==============={0}推荐商品结果为：=========================".format(anchor_name))
    pprint(recommand_items)
    
# 对我们主播进行推荐商品
recommand("大小花", item_recode, anchor_record)

['女装', '多肉']
INFO:tensorflow:推荐系统正在推荐，请骚等…………
{'大小花': [['1111'], ['6666', '7777']]}
