In [1]:
# data from http://grouplens.org/datasets/movielens/

In [1]:
import os
data_folder = os.path.join( "ml-100k")
ratings_filename = os.path.join(data_folder, "u.data")

In [2]:
import pandas as pd

In [3]:
#加载数据集时，把分隔符设置为制表符
#告诉pandas不要把第一行作为表头（header=None ），设置好各列的名称。
all_ratings = pd.read_csv(ratings_filename, delimiter="\t", header=None, names = ["UserID", "MovieID", "Rating", "Datetime"])
#解析时间数据
all_ratings["Datetime"] = pd.to_datetime(all_ratings['Datetime'],unit='s')
#稀疏矩阵（sparse matrix)
all_ratings[:10]

Unnamed: 0,UserID,MovieID,Rating,Datetime
0,196,242,3,1997-12-04 15:55:49
1,186,302,3,1998-04-04 19:22:22
2,22,377,1,1997-11-07 07:18:36
3,244,51,2,1997-11-27 05:02:03
4,166,346,1,1998-02-02 05:33:16
5,298,474,4,1998-01-07 14:20:06
6,115,265,2,1997-12-03 17:51:28
7,253,465,5,1998-04-03 18:34:27
8,305,451,3,1998-02-01 09:20:17
9,6,86,3,1997-12-31 21:16:53


In [4]:
# As you can see, there are no review for most movies, such as #213
all_ratings[all_ratings["UserID"] == 675].sort_values("MovieID")  

Unnamed: 0,UserID,MovieID,Rating,Datetime
81098,675,86,4,1998-03-10 00:26:14
90696,675,223,1,1998-03-10 00:35:51
92650,675,235,1,1998-03-10 00:35:51
95459,675,242,4,1998-03-10 00:08:42
82845,675,244,3,1998-03-10 00:29:35
53293,675,258,3,1998-03-10 00:11:19
97286,675,269,5,1998-03-10 00:08:07
93720,675,272,3,1998-03-10 00:07:11
73389,675,286,4,1998-03-10 00:07:11
77524,675,303,5,1998-03-10 00:08:42


In [5]:
#本次目标生成的规则：
#如果用户喜欢某些电影，他们也会喜欢这部电影

#首先确定用户是否喜欢某一部电影
all_ratings["Favorable"] = all_ratings["Rating"] > 3
all_ratings[0:10]

Unnamed: 0,UserID,MovieID,Rating,Datetime,Favorable
0,196,242,3,1997-12-04 15:55:49,False
1,186,302,3,1998-04-04 19:22:22,False
2,22,377,1,1997-11-07 07:18:36,False
3,244,51,2,1997-11-27 05:02:03,False
4,166,346,1,1998-02-02 05:33:16,False
5,298,474,4,1998-01-07 14:20:06,True
6,115,265,2,1997-12-03 17:51:28,False
7,253,465,5,1998-04-03 18:34:27,True
8,305,451,3,1998-02-01 09:20:17,False
9,6,86,3,1997-12-31 21:16:53,False


In [6]:
all_ratings[all_ratings["UserID"] == 1][:10]

Unnamed: 0,UserID,MovieID,Rating,Datetime,Favorable
202,1,61,4,1997-11-03 07:33:40,True
305,1,189,3,1998-03-01 06:15:28,False
333,1,33,4,1997-11-03 07:38:19,True
334,1,160,4,1997-09-24 03:42:27,True
478,1,20,4,1998-02-14 04:51:23,True
639,1,202,5,1997-09-24 03:40:42,True
687,1,171,5,1998-03-13 01:15:11,True
820,1,265,4,1997-11-03 07:34:01,True
933,1,155,2,1997-11-03 07:30:01,False
972,1,117,3,1997-09-22 22:02:19,False


In [7]:
# Sample the dataset. You can try increasing the size of the sample, but the run time will be considerably longer
ratings = all_ratings[all_ratings['UserID'].isin(range(200))]  # & ratings["UserID"].isin(range(100))]

In [8]:
# 新建一个只包含用户喜欢某部电影的数据行
favorable_ratings = ratings[ratings["Favorable"]]
favorable_ratings[:10]

Unnamed: 0,UserID,MovieID,Rating,Datetime,Favorable
16,122,387,5,1997-11-11 17:47:39,True
20,119,392,4,1998-01-30 16:13:34,True
21,167,486,4,1998-04-16 14:54:12,True
26,38,95,5,1998-04-13 01:14:54,True
28,63,277,4,1997-10-01 23:10:01,True
29,160,234,5,1997-10-14 20:33:05,True
35,157,274,4,1998-02-07 22:33:55,True
39,7,32,4,1998-03-31 13:28:52,True
40,10,16,4,1997-10-26 18:01:17,True
48,99,4,5,1998-02-03 15:18:17,True


In [9]:
# 只留下为超过一部电影打过喜欢分的用户
favorable_reviews_by_users = dict((k, frozenset(v.values)) for k, v in favorable_ratings.groupby("UserID")["MovieID"])
len(favorable_reviews_by_users)

199

In [10]:
# 计算每部电影的喜欢数
num_favorable_by_movie = ratings[["MovieID", "Favorable"]].groupby("MovieID").sum()
num_favorable_by_movie.sort_values("Favorable", ascending=False)[:10]

Unnamed: 0_level_0,Favorable
MovieID,Unnamed: 1_level_1
50,100.0
100,89.0
258,83.0
181,79.0
174,74.0
98,70.0
127,70.0
56,67.0
7,67.0
1,66.0


In [11]:
# Apriori算法是亲和性分析的一部分，专门用于查找数据集中的频繁项集。
# 基本流程是从前一步找到的频繁项集中找到新的备选集合，接着检测备选集合的频繁程度是否够高，然后算法像下面这样进行迭代。
# (1) 把各项目放到只包含自己的项集中，生成最初的频繁项集。只使用达到最小支持度的项目。
# (2) 查找现有频繁项集的超集，发现新的频繁项集，并用其生成新的备选项集。
# (3) 测试新生成的备选项集的频繁程度，如果不够频繁，则舍弃。如果没有新的频繁项集，就跳到最后一步。
# (4) 存储新发现的频繁项集，跳到步骤(2)。
# (5) 返回发现的所有频繁项集。
from collections import defaultdict

def find_frequent_itemsets(favorable_reviews_by_users, k_1_itemsets, min_support):
    counts = defaultdict(int)
    for user, reviews in favorable_reviews_by_users.items():
        for itemset in k_1_itemsets:
            if itemset.issubset(reviews):
                for other_reviewed_movie in reviews - itemset:
                    current_superset = itemset | frozenset((other_reviewed_movie,))
                    counts[current_superset] += 1
    return dict([(itemset, frequency) for itemset, frequency in counts.items() if frequency >= min_support])

In [12]:
import sys
# Apriori算法第一次迭代时，新发现的项集长度为2，它们是步骤(1)中创建的项集的超集。第二次迭代（经过步骤(4)）中，新发现的项集长度为3。
# 我们把发现的频繁项集保存到以项集长度为键的字典中，便于根据长度查找，这样就可以找到最新发现的频繁项集。下面的代码初始化一个字典。

frequent_itemsets = {}  # itemsets are sorted by length
# 最小支持度
min_support = 50

# Apriori算法的第一步，为每一部电影生成只包含它自己的项集，检测它是否够频繁。
# 长度为1的不进入选择
frequent_itemsets[1] = dict((frozenset((movie_id,)), row["Favorable"])
                                for movie_id, row in num_favorable_by_movie.iterrows()
                                if row["Favorable"] > min_support)

print("有 {} 个电影有超过 {} 喜爱数".format(len(frequent_itemsets[1]), min_support))
sys.stdout.flush()
for k in range(2, 20):
    # Generate candidates of length k, using the frequent itemsets of length k-1
    # Only store the frequent itemsets
    cur_frequent_itemsets = find_frequent_itemsets(favorable_reviews_by_users, frequent_itemsets[k-1],
                                                   min_support)
    if len(cur_frequent_itemsets) == 0:
        print("没发现频繁项集 {}".format(k))
        sys.stdout.flush()
        break
    else:
        print("发现 {} 个频繁项集长度为 {}".format(len(cur_frequent_itemsets), k))
        #print(cur_frequent_itemsets)
        sys.stdout.flush()
        frequent_itemsets[k] = cur_frequent_itemsets
# 删除长度为1的项集
del frequent_itemsets[1]

有 16 个电影有超过 50 喜爱数
发现 93 个频繁项集长度为 2
发现 295 个频繁项集长度为 3
发现 593 个频繁项集长度为 4
发现 785 个频繁项集长度为 5
发现 677 个频繁项集长度为 6
发现 373 个频繁项集长度为 7
发现 126 个频繁项集长度为 8
发现 24 个频繁项集长度为 9
发现 2 个频繁项集长度为 10
没发现频繁项集 11


In [13]:
print("总共发现 {0} 个频繁项集".format(sum(len(itemsets) for itemsets in frequent_itemsets.values())))

总共发现 2968 个频繁项集


In [14]:
# 下面的代码通过遍历不同长度的频繁项集，为每个项集生成规则。
candidate_rules = []
for itemset_length, itemset_counts in frequent_itemsets.items():
    for itemset in itemset_counts.keys():
        # 遍历项集中的每一部电影，把它作为结论。项集中的其他电影作为前提，用前提和结论组成备选规则。
        for conclusion in itemset:
            premise = itemset - set((conclusion,))
            candidate_rules.append((premise, conclusion))
print("共有 {} 候选规则".format(len(candidate_rules)))

共有 15285 候选规则


In [15]:
# 遍历项集中的每一部电影，把它作为结论。项集中的其他电影作为前提，用前提和结论组成备选规则。
print(candidate_rules[0:3])

[(frozenset({7}), 1), (frozenset({1}), 7), (frozenset({50}), 1)]


In [16]:
# 我们需要先创建两个字典，用来存储规则应验（正例 ）和规则不适用（反例 ）的次数。
correct_counts = defaultdict(int)
incorrect_counts = defaultdict(int)
# 遍历所有用户及其喜欢的电影数据，在这个过程中遍历每条关联规则。
for user, reviews in favorable_reviews_by_users.items():
    for candidate_rule in candidate_rules:
        premise, conclusion = candidate_rule
        # 用户是否喜欢前提中的所有电影
        if premise.issubset(reviews):
            # 如果前提符合，看一下用户是否喜欢结论中的电影
            if conclusion in reviews:
                correct_counts[candidate_rule] += 1
            else:
                incorrect_counts[candidate_rule] += 1
# 用规则应验的次数除以前提条件出现的总次数，计算每条规则的置信度。
rule_confidence = {candidate_rule: correct_counts[candidate_rule] / float(correct_counts[candidate_rule] + incorrect_counts[candidate_rule])
              for candidate_rule in candidate_rules}

In [17]:
# 最小置信度
min_confidence = 0.95

In [18]:
# 筛选出置信度>0.95的
rule_confidence = {rule: confidence for rule, confidence in rule_confidence.items() if confidence > min_confidence}
print(len(rule_confidence))

2715


In [19]:
from operator import itemgetter
sorted_confidence = sorted(rule_confidence.items(), key=itemgetter(1), reverse=True)

In [20]:
for index in range(5):
    print("Rule #{0}".format(index + 1))
    # 对字典排序，输出前5
    (premise, conclusion) = sorted_confidence[index][0]
    print(sorted_confidence[index])
    print("Rule: If a person recommends {0} they will also recommend {1}".format(premise, conclusion))
    print(" - Confidence: {0:.3f}".format(rule_confidence[(premise, conclusion)]))
    print("")

Rule #1
((frozenset({98, 181}), 50), 1.0)
Rule: If a person recommends frozenset({98, 181}) they will also recommend 50
 - Confidence: 1.000

Rule #2
((frozenset({172, 79}), 174), 1.0)
Rule: If a person recommends frozenset({172, 79}) they will also recommend 174
 - Confidence: 1.000

Rule #3
((frozenset({258, 172}), 174), 1.0)
Rule: If a person recommends frozenset({258, 172}) they will also recommend 174
 - Confidence: 1.000

Rule #4
((frozenset({1, 181, 7}), 50), 1.0)
Rule: If a person recommends frozenset({1, 181, 7}) they will also recommend 50
 - Confidence: 1.000

Rule #5
((frozenset({1, 172, 7}), 174), 1.0)
Rule: If a person recommends frozenset({1, 172, 7}) they will also recommend 174
 - Confidence: 1.000



In [21]:
# 从电影名字表中获取电影名字
movie_name_filename = os.path.join(data_folder, "u.item")
movie_name_data = pd.read_csv(movie_name_filename, delimiter="|", header=None, encoding = "mac-roman")
movie_name_data.columns = ["MovieID", "Title", "Release Date", "Video Release", "IMDB", "<UNK>", "Action", "Adventure",
                           "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir",
                           "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"]

In [22]:
def get_movie_name(movie_id):
    title_object = movie_name_data[movie_name_data["MovieID"] == movie_id]["Title"]
    title = title_object.values[0]
    return title

In [23]:
get_movie_name(4)

'Get Shorty (1995)'

In [24]:
for index in range(1):
    print("Rule #{0}".format(index + 1))
    (premise, conclusion) = sorted_confidence[index][0]
    premise_names = ", ".join(get_movie_name(idx) for idx in premise)
    conclusion_name = get_movie_name(conclusion)
    print("Rule: If a person recommends {0} they will also recommend {1}".format(premise_names, conclusion_name))
    print(" - Confidence: {0:.3f}".format(rule_confidence[(premise, conclusion)]))
    print("")

Rule #1
Rule: If a person recommends Silence of the Lambs, The (1991), Return of the Jedi (1983) they will also recommend Star Wars (1977)
 - Confidence: 1.000



In [25]:
# 测试集
test_dataset = all_ratings[~all_ratings['UserID'].isin(range(300))]
test_favorable = test_dataset[test_dataset["Favorable"]]
test_favorable_by_users = dict((k, frozenset(v.values)) for k, v in test_favorable.groupby("UserID")["MovieID"])

#test_not_favourable = test_dataset[~test_dataset["Favourable"]]
#test_not_favourable_by_users = dict((k, frozenset(v.values)) for k, v in test_not_favourable.groupby("UserID")["MovieID"])
#test_users = test_dataset["UserID"].unique()

In [26]:
test_dataset[:5]

Unnamed: 0,UserID,MovieID,Rating,Datetime,Favorable
8,305,451,3,1998-02-01 09:20:17,False
15,303,785,3,1997-11-14 05:28:38,False
24,308,1,4,1998-02-17 17:28:52,True
31,301,98,4,1997-12-14 05:03:47,True
147,305,427,5,1998-02-01 08:51:30,True


In [27]:
correct_counts = defaultdict(int)
incorrect_counts = defaultdict(int)
for user, reviews in test_favorable_by_users.items():
    for candidate_rule in candidate_rules:
        premise, conclusion = candidate_rule
        if premise.issubset(reviews):
            if conclusion in reviews:
                correct_counts[candidate_rule] += 1
            else:
                incorrect_counts[candidate_rule] += 1

In [28]:
test_confidence = {candidate_rule: correct_counts[candidate_rule] / float(correct_counts[candidate_rule] + incorrect_counts[candidate_rule])
                   for candidate_rule in rule_confidence}
print(len(test_confidence))

2715


In [29]:
sorted_test_confidence = sorted(test_confidence.items(), key=itemgetter(1), reverse=True)
print(sorted_test_confidence[:5])

[((frozenset({64, 1, 172, 79}), 174), 1.0), ((frozenset({64, 1, 7, 172, 56}), 174), 1.0), ((frozenset({64, 1, 7, 181, 56}), 174), 1.0), ((frozenset({64, 1, 98, 7, 79}), 174), 1.0), ((frozenset({64, 1, 7, 172, 79}), 174), 1.0)]


In [30]:
json = []
for index in range(52116):
    (premise, conclusion) = sorted_confidence[index][0]
    premise_names = ", ".join(get_movie_name(idx) for idx in premise)
    conclusion_name = get_movie_name(conclusion)
    if test_confidence.get((premise, conclusion), -1) > 0.96:
        print("Rule #{0}".format(index + 1))
        dict = []
        for idx in premise:
            dict.append(get_movie_name(idx))
        item = {}
        item["favor"] = dict
        item["recomondation"] = get_movie_name(conclusion)
        json.append(item)
        # print("{0}".format(conclusion_name))
        print("规则，如果喜欢 {0} 会推荐 {1}".format(premise_names, conclusion_name))
        print(" - Train Confidence: {0:.3f}".format(rule_confidence.get((premise, conclusion), -1)))
        print(" - Test Confidence: {0:.3f}".format(test_confidence.get((premise, conclusion), -1)))
        print("")



Rule #7
规则，如果喜欢 Pulp Fiction (1994), Toy Story (1995), Return of the Jedi (1983) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.964

Rule #9
规则，如果喜欢 Toy Story (1995), Empire Strikes Back, The (1980), Return of the Jedi (1983) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.966

Rule #16
规则，如果喜欢 Pulp Fiction (1994), Return of the Jedi (1983), Twelve Monkeys (1995) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.963

Rule #38
规则，如果喜欢 Pulp Fiction (1994), Return of the Jedi (1983), Fugitive, The (1993) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.973

Rule #40
规则，如果喜欢 Pulp Fiction (1994), Fargo (1996), Return of the Jedi (1983) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.984

Rule #42
规则，如果喜欢 Pulp Fiction (1994), Empire Strikes Back, The (1980), Return of the Jedi (1983) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.978

Rule #43
规则，如果喜欢 Pulp Fictio

Rule #347
规则，如果喜欢 Pulp Fiction (1994), Silence of the Lambs, The (1991), Empire Strikes Back, The (1980), Return of the Jedi (1983) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.969

Rule #351
规则，如果喜欢 Pulp Fiction (1994), Silence of the Lambs, The (1991), Contact (1997), Return of the Jedi (1983) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.973

Rule #354
规则，如果喜欢 Pulp Fiction (1994), Fargo (1996), Return of the Jedi (1983), Godfather, The (1972) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.977

Rule #356
规则，如果喜欢 Pulp Fiction (1994), Empire Strikes Back, The (1980), Fargo (1996), Return of the Jedi (1983) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.980

Rule #357
规则，如果喜欢 Pulp Fiction (1994), Fargo (1996), Return of the Jedi (1983), Raiders of the Lost Ark (1981) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.980

Rule #360
规则，如果喜欢 Pulp Fiction (1994), Contact (1997)

Rule #590
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Twelve Monkeys (1995), Fugitive, The (1993) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #591
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #592
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Fargo (1996), Twelve Monkeys (1995) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #593
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Twelve Monkeys (1995), Empire Strikes Back, The (1980) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 0.968

Rule #594
规则，如果喜欢 Shawshank Redemption, The (1994)

Rule #738
规则，如果喜欢 Silence of the Lambs, The (1991), Twelve Monkeys (1995), Fugitive, The (1993), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.969

Rule #752
规则，如果喜欢 Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.969

Rule #754
规则，如果喜欢 Twelve Monkeys (1995), Raiders of the Lost Ark (1981), Fugitive, The (1993), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.970

Rule #762
规则，如果喜欢 Silence of the Lambs, The (1991), Fargo (1996), Twelve Monkeys (1995), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.966

Rule #781
规则，如果喜欢 Fargo (1996), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Return of the Jedi (1983), Pulp Fiction (1994) 

Rule #1083
规则，如果喜欢 Shawshank Redemption, The (1994), Fargo (1996), Return of the Jedi (1983), Pulp Fiction (1994), Godfather, The (1972) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.971

Rule #1085
规则，如果喜欢 Shawshank Redemption, The (1994), Fargo (1996), Empire Strikes Back, The (1980), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.971

Rule #1086
规则，如果喜欢 Shawshank Redemption, The (1994), Fargo (1996), Raiders of the Lost Ark (1981), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.971

Rule #1089
规则，如果喜欢 Shawshank Redemption, The (1994), Empire Strikes Back, The (1980), Return of the Jedi (1983), Pulp Fiction (1994), Godfather, The (1972) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.977

Rule #1090
规则，如果喜欢 Shawshank Redemption, The (1994), Raiders of the Lost Ark (1981), Return of the Jedi (1983), Pu

Rule #1235
规则，如果喜欢 Silence of the Lambs, The (1991), Contact (1997), Empire Strikes Back, The (1980), Raiders of the Lost Ark (1981), Return of the Jedi (1983) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.974

Rule #1281
规则，如果喜欢 Shawshank Redemption, The (1994), Contact (1997), Raiders of the Lost Ark (1981), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Empire Strikes Back, The (1980)
 - Train Confidence: 1.000
 - Test Confidence: 0.963

Rule #1328
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Twelve Monkeys (1995), Fugitive, The (1993), Star Wars (1977), Pulp Fiction (1994) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #1336
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Twelve Monkeys (1995), Star Wars (1977), Pulp Fiction (1994) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 0.964

Rule #1339
规则，如果喜欢 Shawshank 

Rule #1440
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #1443
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Twelve Monkeys (1995), Fugitive, The (1993), Return of the Jedi (1983) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #1444
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993), Return of the Jedi (1983) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #1445
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Return of the Jedi (1983) 会推荐 Raiders o

Rule #1548
规则，如果喜欢 Shawshank Redemption, The (1994), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993), Star Wars (1977), Pulp Fiction (1994) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 0.966

Rule #1549
规则，如果喜欢 Shawshank Redemption, The (1994), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.962

Rule #1551
规则，如果喜欢 Shawshank Redemption, The (1994), Twelve Monkeys (1995), Raiders of the Lost Ark (1981), Fugitive, The (1993), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.962

Rule #1553
规则，如果喜欢 Shawshank Redemption, The (1994), Twelve Monkeys (1995), Fugitive, The (1993), Star Wars (1977), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Conf

Rule #1830
规则，如果喜欢 Silence of the Lambs, The (1991), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 0.966

Rule #1861
规则，如果喜欢 Shawshank Redemption, The (1994), Silence of the Lambs, The (1991), Fargo (1996), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 0.962

Rule #1865
规则，如果喜欢 Shawshank Redemption, The (1994), Silence of the Lambs, The (1991), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993), Return of the Jedi (1983) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 0.966

Rule #1867
规则，如果喜欢 Shawshank Redemption, The (1994), Fargo (1996), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993), Return of the Jedi (1983) 会推荐 Raiders of 

规则，如果喜欢 Shawshank Redemption, The (1994), Contact (1997), Silence of the Lambs, The (1991), Raiders of the Lost Ark (1981), Star Wars (1977), Return of the Jedi (1983) 会推荐 Empire Strikes Back, The (1980)
 - Train Confidence: 1.000
 - Test Confidence: 0.967

Rule #2030
规则，如果喜欢 Silence of the Lambs, The (1991), Contact (1997), Empire Strikes Back, The (1980), Raiders of the Lost Ark (1981), Fugitive, The (1993), Return of the Jedi (1983) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.967

Rule #2071
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Twelve Monkeys (1995), Fugitive, The (1993), Star Wars (1977), Pulp Fiction (1994) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #2077
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993), Star Wars (1977), Pulp Fiction (1994) 会推荐 Raiders of the Lo

Rule #2167
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Fargo (1996), Star Wars (1977), Pulp Fiction (1994), Godfather, The (1972) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #2168
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Empire Strikes Back, The (1980), Raiders of the Lost Ark (1981), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.968

Rule #2170
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Empire Strikes Back, The (1980), Star Wars (1977), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #2171
规则，如果喜欢 Toy Story (1995), Silence of the Lambs, The (1991), Empire Strikes Back, The (1980), Raiders of the Lost Ark (1981), Fugitive, T

Rule #2458
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #2460
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Empire Strikes Back, The (1980), Fugitive, The (1993), Star Wars (1977), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Raiders of the Lost Ark (1981)
 - Train Confidence: 1.000
 - Test Confidence: 1.000

Rule #2461
规则，如果喜欢 Shawshank Redemption, The (1994), Toy Story (1995), Silence of the Lambs, The (1991), Empire Strikes Back, The (1980), Raiders of the Lost Ark (1981), Fugitive, The (1993), Return of the Jedi (1983), Pulp Fiction (1994) 会推荐 Star Wars (1977)
 - Train Confidence: 1.000
 - Test Confidence: 0.962

Rule #2491
规则，如果喜欢 Shawshank Redemption, The (1994), 

IndexError: list index out of range

In [31]:
print(json)

[{'favor': ['Pulp Fiction (1994)', 'Toy Story (1995)', 'Return of the Jedi (1983)'], 'recomondation': 'Star Wars (1977)'}, {'favor': ['Toy Story (1995)', 'Empire Strikes Back, The (1980)', 'Return of the Jedi (1983)'], 'recomondation': 'Star Wars (1977)'}, {'favor': ['Pulp Fiction (1994)', 'Return of the Jedi (1983)', 'Twelve Monkeys (1995)'], 'recomondation': 'Star Wars (1977)'}, {'favor': ['Pulp Fiction (1994)', 'Return of the Jedi (1983)', 'Fugitive, The (1993)'], 'recomondation': 'Star Wars (1977)'}, {'favor': ['Pulp Fiction (1994)', 'Fargo (1996)', 'Return of the Jedi (1983)'], 'recomondation': 'Star Wars (1977)'}, {'favor': ['Pulp Fiction (1994)', 'Empire Strikes Back, The (1980)', 'Return of the Jedi (1983)'], 'recomondation': 'Star Wars (1977)'}, {'favor': ['Pulp Fiction (1994)', 'Return of the Jedi (1983)', 'Raiders of the Lost Ark (1981)'], 'recomondation': 'Star Wars (1977)'}, {'favor': ['Shawshank Redemption, The (1994)', 'Empire Strikes Back, The (1980)', 'Return of the Je