In [2]:
import numpy as np
import pandas as pd
import pyecharts
from pyecharts.charts import Bar
from pyecharts.charts import Line
from pyecharts.charts import Pie
from pyecharts import options as opts
import os

print(pyecharts.__version__)

1.8.0


In [3]:
path = '../data/test1/ccf_offline_stage1_test_revised.csv'
data = pd.read_csv(path)
dataset = data.copy()
#将 Distance 中空值填充为-1
dataset['Distance'].fillna(-1, inplace=True)
#将领券时间转换为时间类型
dataset['Date_received'] = pd.to_datetime(dataset['Date_received'], format='%Y%m%d')
#判断优惠券是否为满减类型
dataset['Ismanjian'] = dataset['Discount_rate'].map(lambda x: 1 if ":" in str(x) else 0)
#将优惠券转换为折扣率
dataset['Discount_rate'] = dataset['Discount_rate'].map(lambda x: round(float(x), 5)
if ':' not in str(x)
else round((float(str(x).split(':')[0]) - float(str(x).split(':')[1])) / float(str(x).split(':')[0]), 5))
#数据打标：优惠券折扣大于等于 8 折的为 1，反之为 0
dataset['Label'] = list(map(lambda x: 1 if x <= 0.8 else 0, dataset['Discount_rate']))
#按照日期转换为领券星期
dataset["Weekday_receive"] = dataset['Date_received'].apply(lambda x: x.isoweekday())
#获取领券月份
dataset['Received_month'] = dataset['Date_received'].apply(lambda x: x.month)
#生成处理后的数据表格
path = 'ccf_offline_stage1_test_revised_output'
if not os.path.exists(path):
    os.makedirs(path)
dataset.to_csv(path + 'ccf_offline_stage1_test_revised_output.csv', index=False)

In [7]:
dataset_1 = dataset[dataset['Date_received'].notna()]  #取出领券日期非零项
group = dataset_1.groupby('Date_received', as_index=False)['Coupon_id'].count()
#按照领券日期进行分组，统计优惠券数量
Bar_1 = (
    Bar(
        init_opts=opts.InitOpts
        (width='1500px', height='600px')
    )
    #添加 x 轴数
    .add_xaxis(list(group['Date_received']))
    #添加 y 轴数据
    .add_yaxis('', list(group['Coupon_id']))
    #全局配置
    .set_global_opts(
        title_opts=opts.TitleOpts(title='每天领券数'),  #柱状图标题
        legend_opts=opts.LegendOpts(is_show=True),  #显示图例

        xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=60, horizontal_align='right'),
                                 interval=1),
    )  #显示横坐标数值，逆时针旋转 60 度)
    #系列配置
    .set_series_opts(
        opts.LabelOpts(is_show=True),  #显示值
        markline_opts=opts.MarkLineOpts(
            data=[
                opts.MarkLineItem
                (type_='max', name='最大值')
            ]
        )
    )
)
Bar_1.render(path + '/Bar_1.html')

  super().__init__(init_opts=init_opts)


'E:\\code\\pycharm\\learn\\ccf_offline_stage1_test_revised_output\\Bar_1.html'

In [13]:
import collections

distance = dataset[dataset['Distance'] != -1]['Distance'].values
#-1 为原空数据，筛选所有非空数据
distance = dict(collections.Counter(distance))
#对所有非空数据进行计数后存放到新生成的字典里
x = list(distance.keys())  #距离字典的所有键存到 x 列表
x.sort(reverse=False)  #对距离进行排序
y = list(distance.values())  #距离字典的所有值存到 y 列表
Bar_2 = (
    Bar()
    #添加 x 轴数据
    .add_xaxis(x)
    #添加 y 轴数据
    .add_yaxis('', y)
    #全局配置
    .set_global_opts(
        title_opts=opts.TitleOpts(title='各类距离消费次数')
    )
    #系列配置
    .set_series_opts(
        opts.LabelOpts(is_show=True)
    )
)
Bar_2.render(path + '/Bar_2.html')

  super().__init__(init_opts=init_opts)


'E:\\code\\pycharm\\learn\\ccf_offline_stage1_test_revised_output\\Bar_2.html'

In [14]:
rate = [dataset[dataset['Distance'] == i]['Label'].value_counts()[1] / dataset[dataset['Distance'] == i][
    'Label'].value_counts().sum() for i in range(11)]
Bar_3 = (
    Bar()
    #添加 x 轴数据
    .add_xaxis(list(range(11)))
    #添加 y 轴数据
    .add_yaxis('核销率', list(rate))
    #全局配置
    .set_global_opts(title_opts=opts.TitleOpts(title='消费距离与核销率'))
    #系列配置
    .set_series_opts(
        opts.LabelOpts(is_show=False)
    )
)
Bar_3.render(path + '/Bar_3.html')

  super().__init__(init_opts=init_opts)


'E:\\code\\pycharm\\learn\\ccf_offline_stage1_test_revised_output\\Bar_3.html'

In [15]:
received = dataset[['Discount_rate']]
received['cnt'] = 1
received = received.groupby('Discount_rate').agg('sum').reset_index()
consume_coupon = dataset[dataset['Label'] == 1][['Discount_rate']]
consume_coupon['cnt_2'] = 1
consume_coupon = consume_coupon.groupby('Discount_rate').agg('sum').reset_index()
data = received.merge(consume_coupon, on='Discount_rate', how='left').fillna(0)
Bar_4 = (
    Bar()
    .add_xaxis([float('%.4f' % x) for x in list(data.Discount_rate)])
    #添加 x 轴数据
    .add_yaxis('领取', list(data.cnt))
    #添加 y 轴数据
    .add_yaxis('核销', list(data.cnt_2))
    #全局配置
    .set_global_opts(title_opts={'text': '领取与核销'})
    #系列配置
    .set_series_opts(
        opts.LabelOpts(is_show=True)  # 显示值大小
    )
)
Bar_4.render(path + '/Bar_4.html')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  received['cnt'] = 1
  super().__init__(init_opts=init_opts)


'E:\\code\\pycharm\\learn\\ccf_offline_stage1_test_revised_output\\Bar_4.html'

In [16]:
week_coupon = dataset[dataset['Label'] == 1]['Weekday_receive'].value_counts()
week_received = dataset[dataset['Weekday_receive'].notna()]['Weekday_receive'].value_counts()
week_coupon.sort_index(inplace=True)
week_received.sort_index(inplace=True)
line_1 = (
    Line()
    .add_xaxis([str(x) for x in range(1, 8)])
    .add_yaxis('领取', list(week_received))
    .add_yaxis('核销', list(week_coupon))
    .set_global_opts(title_opts={'text': '每周领券数与核销数折线图'})
)
line_1.render(path + '/line_1.html')
##############各类优惠券数量占比饼图################
v1 = ['折扣', '满减']
v2 = list(dataset[dataset['Date_received'].notna()]['Ismanjian'].value_counts(True))
print(v2)
pie_1 = (
    Pie()
    .add('', [list(v) for v in zip(v1, v2)])
    .set_global_opts(title_opts={'text': '各类优惠券数量占比饼图'})
    .set_series_opts(label_opts=opts.LabelOpts(formatter='{b}: {c}'))
)
pie_1.render(path + '/pie_1.html')

[0.977419922562478, 0.022580077437522]


  super().__init__(init_opts=init_opts)
  super().__init__(init_opts=init_opts)


'E:\\code\\pycharm\\learn\\ccf_offline_stage1_test_revised_output\\pie_1.html'

In [17]:
v3 = list(dataset[dataset['Label'] == 1].Ismanjian.value_counts(True))
pie_2 = (
    Pie()
    .add('', [list(v) for v in zip(v1, v3)])
    .set_global_opts(title_opts={'text': '核销优惠券数量占比饼图'})
    .set_series_opts(label_opts=opts.LabelOpts(formatter='{b}: {c}'))
)
pie_2.render(path + '/pie_2.html')
v4 = ['正例', '负例']
v5 = list(dataset['Label'].value_counts(True))
pie_3 = (
    Pie()
    .add('', [list(v) for v in zip(v4, v5)])
    .set_global_opts(title_opts={'text': '正负例饼图'})
    .set_series_opts(label_opts=opts.LabelOpts(formatter='{b}: {c}'))  # 格式化标签输出内容
)
pie_3.render(path + '/pie_3.html')

  super().__init__(init_opts=init_opts)
  super().__init__(init_opts=init_opts)


'E:\\code\\pycharm\\learn\\ccf_offline_stage1_test_revised_output\\pie_3.html'