In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf, monotonically_increasing_id
from pyspark.sql.types import IntegerType, StringType, StructType, StructField, FloatType
from pyspark.sql.functions import count,min,mean,col,sum,when,max,min,avg,explode,split,row_number,year,month , to_date,trim
from pyspark.sql.window import Window


In [2]:
def create_spark_session():
    """
    Create a Spark session.
    """
    spark = SparkSession.builder \
        .appName("Movies") \
        .master("local[*]") \
        .config('spark.sql.shuffle.partitions', 2) \
        .config('spark.sql.warehouse.dir', 'hdfs://node1:8020/user/hive/warehouse') \
        .config('hive.metastore.uris', 'thrift://node1:9083') \
        .config("spark.jars", "/usr/share/java/mysql-connector-java.jar") \
        .enableHiveSupport() \
        .getOrCreate()

    return spark

In [3]:
def saveTable(result,tableName):
    print(f'Saving {tableName}')
    result.write.mode("overwrite"). \
        format("jdbc"). \
        option("url", "jdbc:mysql://node1:3306/movies"). \
        option("dbtable", f"{tableName}"). \
        option("user", "root"). \
        option("password", "sakura0113"). \
        option("driver", "com.mysql.cj.jdbc.Driver"). \
        option("encoding", "UTF-8"). \
        save()

    result.write.mode("overwrite"). \
        saveAsTable(f"{tableName}", format="parquet", )
    spark.sql(f"select * from {tableName}").show()

In [4]:
#创建session
spark = create_spark_session()

#读取电影文件
catMovieData = spark.read.table('movieData')


25/04/20 20:19:18 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/04/20 20:19:21 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


In [5]:
explode_df = catMovieData.withColumn('movie_tag',explode(split(col('movie_tag'),'-')))
result1 = explode_df.groupby('movie_tag').agg(count('movie_tag').alias('tag_count'))
saveTable(result1,'movieTagCount')

Saving movieTagCount


25/04/20 11:39:10 WARN SessionState: METASTORE_FILTER_HOOK will be ignored, since hive.security.authorization.manager is set to instance of HiveAuthorizerFactory.


+---------+---------+
|movie_tag|tag_count|
+---------+---------+
|     音乐|       66|
|     剧情|      808|
|     动画|      382|
|     神话|        4|
|     动作|      523|
|     爱情|      271|
|     奇幻|      247|
|     犯罪|      194|
|     恐怖|      101|
|     戏曲|        7|
|     古装|       21|
|     儿童|       10|
|     热血|        2|
| 黑色电影|        1|
|     农村|        4|
|     西部|        3|
|     军事|        1|
|     搞笑|        2|
| 舞台艺术|        2|
|     纪录|        1|
+---------+---------+
only showing top 20 rows



In [6]:
"""
Top10票房统计
"""
# 如果列中有非数字（如 "Unknown" 或空值），可以先过滤：
df_clean = catMovieData.filter(
    col("movie_all_ticket").rlike("^\\d+$"),
)
df_numeric = df_clean.withColumn(
    "movie_all_ticket_num",
    col("movie_all_ticket").cast("integer")
)

sort_df = df_numeric.orderBy(col("movie_all_ticket_num").desc())
sort_df.show()
result2 = sort_df.limit(10)
saveTable(result2,"ticketTopMovie")


                                                                                

+-------+---------------------+-------------------+-------------+------------+-------------+----------+-------------------------------------+--------------+---------------------------------+----------+---------------------+----------------+--------------------+
|movieId|           movie_name|          movie_tag|movie_country|movie_length|movie_release|movie_rate|                        movie_summary|movie_director|                      movie_actor|movie_rank|movie_firtweek_ticket|movie_all_ticket|movie_all_ticket_num|
+-------+---------------------+-------------------+-------------+------------+-------------+----------+-------------------------------------+--------------+---------------------------------+----------+---------------------+----------------+--------------------+
|     25|       哪吒之魔童闹海|     喜剧-剧情-动画|    中国大陆 |         144|   2025-01-29|        98|天劫之后，哪吒、敖丙的灵魂虽保住了...|          饺子|        吕艳婷-瀚墨-囧森瑟夫-绿绮|         1|               313135|         1427246|             1427246|

25/04/20 11:40:34 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

+-------+---------------------+-------------------+-------------+------------+-------------+----------+-------------------------------------+--------------+---------------------------------+----------+---------------------+----------------+--------------------+
|movieId|           movie_name|          movie_tag|movie_country|movie_length|movie_release|movie_rate|                        movie_summary|movie_director|                      movie_actor|movie_rank|movie_firtweek_ticket|movie_all_ticket|movie_all_ticket_num|
+-------+---------------------+-------------------+-------------+------------+-------------+----------+-------------------------------------+--------------+---------------------------------+----------+---------------------+----------------+--------------------+
|     25|       哪吒之魔童闹海|     喜剧-剧情-动画|    中国大陆 |         144|   2025-01-29|        98|天劫之后，哪吒、敖丙的灵魂虽保住了...|          饺子|        吕艳婷-瀚墨-囧森瑟夫-绿绮|         1|               313135|         1427246|             1427246|

In [7]:
df_clean = catMovieData.filter(
    col("movie_all_ticket").rlike("^\\d+$"),
).filter(
    col("movie_firtweek_ticket").rlike("^\\d+$"),
)

df_numeric = df_clean.withColumn(
    "movie_all_ticket_num",
    col("movie_all_ticket").cast("integer")
).withColumn(
    "movie_firtweek_ticket_num",
    col("movie_firtweek_ticket").cast("integer")
)

explode_df = df_numeric.withColumn('movie_tag',explode(split(col('movie_tag'),'-')))
result3 = explode_df.groupby(col('movie_tag')).agg(
    max('movie_firtweek_ticket_num').alias('max_movie_firtweek_ticket'),
    max('movie_all_ticket_num').alias('max_movie_all_ticket'),)
saveTable(result3,"typeTopMovie")

# """
# 种类票房统计
# """

Saving typeTopMovie


                                                                                

+---------+-------------------------+--------------------+
|movie_tag|max_movie_firtweek_ticket|max_movie_all_ticket|
+---------+-------------------------+--------------------+
|     音乐|                     8310|               45936|
|     剧情|                   313135|             1427246|
|     动画|                   313135|             1427246|
|     神话|                    89616|              263649|
|     动作|                   223284|              569454|
|     爱情|                   181992|              339120|
|     犯罪|                   136959|              384984|
|     奇幻|                   223284|              425015|
|     恐怖|                    19952|               31117|
|     古装|                    42864|               62899|
|     热血|                    38150|               66351|
|     儿童|                     5536|               13897|
| 舞台艺术|                       51|                 224|
|     农村|                      157|               10973|
|   合家欢|                   

In [8]:
"""
国家统计
"""
explode_df = catMovieData.withColumn('country', explode(split(col('movie_country'), ',')))
result4 = explode_df.groupby('country').agg(
    count('country').alias('country_count'),
)
saveTable(result4,"countryCount")


Saving countryCount
+---------+-------------+
|  country|country_count|
+---------+-------------+
|    印度 |           12|
|  新西兰 |            8|
| 中国香港|           25|
|  意大利 |            9|
| 中国台湾|            8|
|     英国|           39|
|   西班牙|            3|
|    法国 |           34|
|中国台湾 |           44|
|    泰国 |            4|
|     法国|           23|
|  加拿大 |           22|
|   新西兰|            2|
|  奥地利 |            2|
|  比利时 |            5|
|   加拿大|            7|
|  西班牙 |           10|
|   爱尔兰|            3|
| 澳大利亚|            4|
|   意大利|            5|
+---------+-------------+
only showing top 20 rows



In [9]:
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
"""
评分分布
"""
cat_rate_df = catMovieData.withColumn(
    'rateCategory',
    when((col('movie_rate') >= 10) & (col('movie_rate') < 20), '0.5星')
    .when((col('movie_rate') >= 20) & (col('movie_rate') < 30), '1星')
    .when((col('movie_rate') >= 30) & (col('movie_rate') < 40), '1.5星')
    .when((col('movie_rate') >= 40) & (col('movie_rate') < 50), '2星')
    .when((col('movie_rate') >= 50) & (col('movie_rate') < 60), '2.5星')
    .when((col('movie_rate') >= 60) & (col('movie_rate') < 70), '3星')
    .when((col('movie_rate') >= 70) & (col('movie_rate') < 80), '3.5星')
    .when((col('movie_rate') >= 80) & (col('movie_rate') < 90), '4星')
    .when((col('movie_rate') >= 90) & (col('movie_rate') <= 100), '4.5星')
    .otherwise('未知')  # 处理不在上述范围的情况
)
cat_rate_df = cat_rate_df.filter(col('movie_rate') != 0)
result5 = cat_rate_df.groupby(col('rateCategory')).count()

saveTable(result5,"rateCount")


Saving rateCount
+------------+-----+
|rateCategory|count|
+------------+-----+
|         4星|  655|
|         2星|    9|
|       1.5星|    6|
|       4.5星|  379|
|       3.5星|  264|
|       2.5星|   36|
|         3星|   89|
|         1星|    1|
+------------+-----+



# 类型分析

In [10]:
"""
种类票房Top10
"""
# 如果列中有非数字（如 "Unknown" 或空值），可以先过滤：
df_clean = catMovieData.filter(
    col("movie_all_ticket").rlike("^\\d+$"),
)
df_numeric = df_clean.withColumn(
    "movie_all_ticket_num",
    col("movie_all_ticket").cast("integer")
)
explode_df = df_numeric.withColumn('type',explode(split(col('movie_tag'),'-')))
group_df = explode_df.groupby('type','movie_name').agg({'movie_all_ticket':'sum'}).withColumnRenamed('sum(movie_all_ticket)','movie_all_ticket')

window = Window.partitionBy('type').orderBy(col('movie_all_ticket').desc())
rank = group_df.withColumn('rank',row_number().over(window))
result6 = rank.filter(col('rank') <= 10).drop(col('rank'))
saveTable(result6,"perTypeTickets")

Saving perTypeTickets


                                                                                

+------+--------------------------+----------------+
|  type|                movie_name|movie_all_ticket|
+------+--------------------------+----------------+
|主旋律|                青年邓颖超|           164.0|
|  亲情|                  世间有她|          5631.0|
|  亲情|              美人鱼的夏天|           113.0|
|  传记|                  中国机长|        291317.0|
|  传记|              摔跤吧！爸爸|        129932.0|
|  传记|             叶问4：完结篇|        118170.0|
|  传记|                    绿皮书|         47872.0|
|  传记|                血战钢锯岭|         42582.0|
|  传记|                      归来|         29136.0|
|  传记|                    梅艳芳|         11353.0|
|  传记|                  马戏之王|         10125.0|
|  传记|            波西米亚狂想曲|          9907.0|
|  传记|                      叶问|          8491.0|
|  儿童|喜羊羊与灰太狼之兔年顶呱呱|         13897.0|
|  儿童|  喜羊羊与灰太狼之虎虎生威|         12432.0|
|  儿童|     猪猪侠大电影·恐龙日记|          7932.0|
|  儿童|    汪汪队立大功之超能救援|          7911.0|
|  儿童|                踢出个未来|           148.0|
|  儿童|                  校园神探|   

In [11]:
"""
种类评分
"""
filter_df = explode_df.filter(col('movie_rate') != 0)
filter_df.groupby('type').agg(mean('movie_rate').alias('mean_typeRate'))
result7 = filter_df.groupby('type').agg(mean('movie_rate').alias('mean_typeRate'))
saveTable(result7,"meanTypeRate")


Saving meanTypeRate


                                                                                

+------+-----------------+
|  type|    mean_typeRate|
+------+-----------------+
|  音乐|90.28571428571429|
|  剧情|86.18673218673219|
|  动画|87.90222222222222|
|  神话|             85.0|
|  动作|84.72413793103448|
|  爱情| 83.3313953488372|
|  犯罪|84.99115044247787|
|  奇幻|84.72727272727273|
|  恐怖|69.58333333333333|
|  古装|77.07692307692308|
|  热血|             91.5|
|  儿童|             79.0|
|  西部|             86.5|
|合家欢|             80.0|
|  农村|             83.5|
|  戏曲|             91.0|
|纪录片|             92.0|
|  青春|          85.4375|
|  家庭|87.61764705882354|
|  冒险|86.43686006825939|
+------+-----------------+
only showing top 20 rows



In [12]:
"""
种类时长
"""
result8 = explode_df.groupby('type').agg(mean('movie_length').alias('typeAvgLen'))
saveTable(result8,"avgTypeLen")


Saving avgTypeLen
+--------+------------------+
|    type|        typeAvgLen|
+--------+------------------+
|    音乐| 114.9047619047619|
|    剧情|116.50401606425703|
|    动画|            98.644|
|    神话|             146.0|
|    动作|  121.797783933518|
|    爱情|112.86153846153846|
|    犯罪|113.70588235294117|
|    奇幻|114.33707865168539|
|    恐怖| 94.94736842105263|
|    古装|          111.5625|
|    热血|             110.0|
|    儿童| 93.57142857142857|
|    西部|             148.5|
|舞台艺术|             105.5|
|    农村|115.66666666666667|
|  合家欢|              84.8|
|    革命|             101.0|
|    公路|              97.0|
|    戏曲|             107.4|
|    竞技|             210.0|
+--------+------------------+
only showing top 20 rows



In [32]:
filter_df = explode_df.filter(col('movie_rate') != 0)
result9 = filter_df.groupby('type','movie_rate').agg(count('movie_rate').alias('count_typeRate'))
saveTable(result9,'typeCountRate')

Saving typeCountRate
+------+----------+--------------+
|  type|movie_rate|count_typeRate|
+------+----------+--------------+
|  音乐|        95|             2|
|  家庭|        92|            10|
|  剧情|        92|            27|
|  动画|        91|            18|
|  冒险|        91|            29|
|  战争|        79|             1|
|  动作|        79|            10|
|  悬疑|        89|             7|
|  剧情|        89|            26|
|  喜剧|        94|             6|
|  冒险|        89|            27|
|  喜剧|        95|             8|
|  动作|        95|             5|
|  动作|        87|            14|
|  惊悚|        87|             4|
|  动画|        92|            20|
|  喜剧|        91|            22|
|纪录片|        94|             1|
|  战争|        97|             2|
|  奇幻|        87|            11|
+------+----------+--------------+
only showing top 20 rows



# 国家分析

In [87]:
country_explode_df = catMovieData.withColumn(
    'country', 
    explode(split(trim(col('movie_country')), ','))
)
result11 = country_explode_df.groupby('country').agg(mean('movie_rate').alias('countryMeanRate'))
saveTable(result11,"countryMeanRate")

Saving countryMeanRate
+--------+-----------------+
| country|  countryMeanRate|
+--------+-----------------+
|中国香港|81.26515151515152|
|    印度|             82.0|
|    英国|81.84042553191489|
|  新西兰|           87.625|
|  意大利|86.16666666666667|
|中国台湾| 80.3076923076923|
|  西班牙|             80.0|
|    法国| 82.6046511627907|
|    韩国|72.92592592592592|
|  加拿大|             83.0|
|  比利时|             76.0|
|澳大利亚|79.13333333333334|
|马来西亚|             83.5|
|  爱尔兰|             83.0|
|    巴西|             77.0|
|  卢森堡|             74.0|
|    丹麦|             68.0|
|罗马尼亚|             45.0|
|    捷克|85.33333333333333|
|  俄罗斯|81.66666666666667|
+--------+-----------------+
only showing top 20 rows



In [88]:
result12 = country_explode_df.groupby('country').agg(mean('movie_length').alias('countryMeanLength'))
saveTable(result12,"countryMeanLength")

Saving countryMeanLength
+--------+------------------+
| country| countryMeanLength|
+--------+------------------+
|中国香港|111.12337662337663|
|    印度|122.64285714285714|
|    英国|118.80952380952381|
|  新西兰|             150.8|
|  意大利|             143.0|
|中国台湾|113.07692307692308|
|  西班牙|113.76923076923077|
|    法国| 118.0701754385965|
|    韩国|119.97674418604652|
|  加拿大|120.48275862068965|
|  比利时|             105.0|
|澳大利亚|           110.625|
|马来西亚|            109.25|
|  爱尔兰|             107.5|
|    巴西|             123.0|
|  卢森堡|             114.0|
|    丹麦|             103.0|
|罗马尼亚|              98.0|
|    捷克|             132.0|
|  俄罗斯|116.66666666666667|
+--------+------------------+
only showing top 20 rows



In [5]:
df_clean = catMovieData.filter(
    col("movie_all_ticket").rlike("^\\d+$"),
)
df_numeric = df_clean.withColumn(
    "movie_all_ticket_num",
    col("movie_all_ticket").cast("integer")
)
country_explode_df = df_numeric.withColumn(
    'country', 
    explode(split(trim(col('movie_country')), ','))
)
result13 = country_explode_df.groupby('country').agg(sum('movie_all_ticket').alias('countryTotalTicket'))
saveTable(result13,"countryTotalTicket")

Saving countryTotalTicket


25/04/20 19:49:54 WARN SessionState: METASTORE_FILTER_HOOK will be ignored, since hive.security.authorization.manager is set to instance of HiveAuthorizerFactory.


+--------+------------------+
| country|countryTotalTicket|
+--------+------------------+
|中国香港|         2508710.0|
|    印度|          439969.0|
|    英国|          802591.0|
|  新西兰|          186561.0|
|  意大利|           23435.0|
|中国台湾|          517874.0|
|  西班牙|           28788.0|
|    法国|          141701.0|
|  加拿大|          411151.0|
|马来西亚|            4007.0|
|澳大利亚|          259366.0|
|    韩国|             100.0|
|  爱尔兰|            1721.0|
|    捷克|           76482.0|
|  俄罗斯|           73216.0|
|    越南|              64.0|
|中国大陆|       3.0991851E7|
|    日本|         1089692.0|
|    美国|       1.2896414E7|
|    德国|          213170.0|
+--------+------------------+
only showing top 20 rows



In [10]:
"""
国家票房Top10
"""

group_df = country_explode_df.groupby('country','movie_name').agg({'movie_all_ticket':'sum'}).withColumnRenamed('sum(movie_all_ticket)','movie_all_ticket')

window = Window.partitionBy('country').orderBy(col('movie_all_ticket').desc())
rank = group_df.withColumn('rank',row_number().over(window))
result16=rank.filter(col('rank') <= 10).drop(col('rank'))
saveTable(result16,'countryTopMovie')

Saving countryTopMovie
+--------+-------------------------------------+----------------+
| country|                           movie_name|movie_all_ticket|
+--------+-------------------------------------+----------------+
| Unknown|北京希杰星星国际影城有限公司将台分...|            14.0|
| Unknown|苏州文化艺术中心管理有限公司意大利...|            13.0|
| Unknown|    武汉百丽宫影院有限公司2023芳华...|             9.0|
| Unknown|  英皇电影城（北京）有限公司英皇20...|             8.0|
| Unknown| 广州百丽宫影院有限公司2023德国电影展|             5.0|
|中国台湾|                   比悲伤更悲伤的故事|         95843.0|
|中国台湾|                           周处除三害|         66573.0|
|中国台湾|                     少年派的奇幻漂流|         57288.0|
|中国台湾|                    小时代3：刺金时代|         52098.0|
|中国台湾|                               小时代|         48432.0|
|中国台湾|                               想见你|         40362.0|
|中国台湾|                         我的少女时代|         35893.0|
|中国台湾|                    小时代2：青木时代|         29619.0|
|中国台湾|                         当男人恋爱时|         26421.0|
|中国台湾|                   

# 时间分析

In [92]:
##年度评分##
year_df = catMovieData.filter(col('movie_release').isNotNull())
year_df = catMovieData.withColumn('year',year(to_date('movie_release',"yyyy-MM-dd")))
result10 = year_df.groupby('year').agg(mean('movie_rate').alias('yearMeanRate'))
result10 = result10.filter(col('yearMeanRate').isNotNull())
saveTable(result10,"yearMeanRate")

Saving yearMeanRate
+----+-----------------+
|year|     yearMeanRate|
+----+-----------------+
|2010|77.79411764705883|
|2009|82.65384615384616|
|2004|81.36842105263158|
|2016|83.17948717948718|
|2003|79.52380952380952|
|2020|83.56603773584905|
|1998|             84.0|
|2017|83.64044943820225|
|1999|75.92857142857143|
|1974|74.66666666666667|
|1989|81.33333333333333|
|1993|85.83333333333333|
|1992|79.85714285714286|
|1973|             84.0|
|1977|             89.0|
|1996|             77.0|
|1985|             74.5|
|1980|             85.0|
|1981|             81.5|
|NULL|72.76923076923077|
+----+-----------------+
only showing top 20 rows



In [94]:
result14 = year_df.groupby('year').agg(count('movie_rate').alias('yearMovie'))
result14 = result14.filter(col('yearMovie').isNotNull())
saveTable(result14,"yearMovie")

Saving yearMovie
+----+---------+
|year|yearMovie|
+----+---------+
|2010|       34|
|2009|       26|
|2004|       19|
|2016|       78|
|2003|       21|
|2020|       53|
|1998|        6|
|2017|       89|
|1999|       14|
|1974|        3|
|1989|        6|
|1993|        6|
|1976|        0|
|1992|        7|
|1973|        1|
|1977|        1|
|1996|        5|
|1985|        2|
|1980|        2|
|1981|        2|
+----+---------+
only showing top 20 rows



In [5]:
df_clean = catMovieData.filter(
    col("movie_all_ticket").rlike("^\\d+$"),
)
df_numeric = df_clean.withColumn(
    "movie_all_ticket_num",
    col("movie_all_ticket").cast("integer")
)
year_df = df_numeric.filter(col('movie_release').isNotNull())
year_df = df_numeric.withColumn('year',year(to_date('movie_release',"yyyy-MM-dd")))
result15 = year_df.groupby('year').agg(sum('movie_all_ticket').alias('yearTotalTicket'))
result15 = result15.filter(col('yearTotalTicket').isNotNull())
saveTable(result15,"yearTotalTicket")

Saving yearTotalTicket


25/04/20 20:20:50 WARN SessionState: METASTORE_FILTER_HOOK will be ignored, since hive.security.authorization.manager is set to instance of HiveAuthorizerFactory.


+----+---------------+
|year|yearTotalTicket|
+----+---------------+
|2010|       351241.0|
|2009|       250585.0|
|2004|        83613.0|
|2016|      3224473.0|
|2003|        31440.0|
|2020|      1937872.0|
|2017|      4864335.0|
|1998|       144761.0|
|1999|        16221.0|
|1993|            6.0|
|1989|            4.0|
|NULL|         4682.0|
|2024|      4336418.0|
|2025|      2381937.0|
|2014|      1082537.0|
|2021|      3985963.0|
|2019|      5619814.0|
|2002|        80561.0|
|2022|      2801672.0|
|2023|      5197581.0|
+----+---------------+
only showing top 20 rows



In [8]:
group_df = year_df.groupby('year','movie_name').agg({'movie_all_ticket':'sum'}).withColumnRenamed('sum(movie_all_ticket)','movie_all_ticket')

window = Window.partitionBy('year').orderBy(col('movie_all_ticket').desc())
rank = group_df.withColumn('rank',row_number().over(window))
result17 = rank.filter(col('rank') <= 10).drop(col('rank'))
saveTable(result17,'yearTopMovie')

Saving yearTopMovie
+----+-------------------------------------+----------------+
|year|                           movie_name|movie_all_ticket|
+----+-------------------------------------+----------------+
|NULL|                               碟中谍|          4510.0|
|NULL|                             透明侠侣|            69.0|
|NULL|                           她的小梨涡|            36.0|
|NULL|北京希杰星星国际影城有限公司将台分...|            14.0|
|NULL|苏州文化艺术中心管理有限公司意大利...|            13.0|
|NULL|    武汉百丽宫影院有限公司2023芳华...|             9.0|
|NULL|                         根本停不下来|             8.0|
|NULL|                       风起前的蒲公英|             8.0|
|NULL|  英皇电影城（北京）有限公司英皇20...|             8.0|
|NULL| 广州百丽宫影院有限公司2023德国电影展|             5.0|
|1988|                               红高粱|            89.0|
|1989|                             开国大典|             4.0|
|1993|                             霸王别姬|             5.0|
|1993|                         唐伯虎点秋香|             1.0|
|1994|                       背靠背，脸对脸|         