In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
from bson import ObjectId
from pymongo import MongoClient
from scipy.optimize import minimize

In [14]:
DIFFUSION_COLNAMES = ["星期", "关注数", "支持者数", "点赞数", "完成百分比", "筹集金额"]
REVIEW_COLNAMES = ["createTime", "topicId", "nicknameShow", "topicContent", 'likeCount', 'replys']

In [11]:
class ProjectReviewData:
    """转换1个项目的全部评论数据"""
    def __init__(self, proj, diffu_colnames, review_colnames):
        self.proj = proj
        self.diffu_colnames = diffu_colnames
        self.review_colnames = review_colnames
        try:
            self.id = self.proj["_id"]
            self.name = self.proj["项目名称"]
            self.state = self.proj["状态"]
            self.duration = self.proj["众筹期限"]
            self.aim_fund = self.proj["目标金额"]
            self.dyn_info = self.proj["项目动态信息"]
            self.final_fund = self.proj["项目动态信息"][-1]["筹集金额"]
            self.company_name = self.proj["公司名称"]
            self.company_phone = self.proj["公司电话"]
            self.link = self.proj["发起人链接"]
            self.category = self.proj["所属类别"]
            self.review = self.proj["评论"]
            self.num_review = self.proj["评论"]["总评论数"]
        except Exception as e:
            print(e)
            
    def get_dyn_diff_data(self):
        a1 = self.dyn_info[0]  # 项目动态信息第一条
        records = {
            "更新时间": [],
            "支持者数": [],
            "关注数": [],
            "点赞数": [],
            "完成百分比": [],
            "筹集金额": [],
        }
        
        for a2 in self.dyn_info:
            if a2["更新时间"] != a1["更新时间"]:
                records["更新时间"].append(a2["更新时间"])
                records["支持者数"].append(a2["支持者数"])
                records["关注数"].append(a2["关注数"])
                records["完成百分比"].append(a2["完成百分比"])
                records["筹集金额"].append(a2["筹集金额"])
                records["点赞数"].append(a2["点赞数"])
                a1 = a2
                
        return records

    def get_review_data(self):
        # step1 提取评论详细下的数据
        raw_review = self.review["评论详细"]
        indices = []  # 放评论用户的id评论时间
        values = []  # 放所有评论用户评论的信息
        for topicid in raw_review:
            value = []  # 放单条评论用户的id的信息
            for keyinfo in self.review_colnames:
                # review_col_names = ['createTime', 'topicId', 'nicknameShow', 'topicContent']
                if keyinfo == "createTime":
                    indices.append(raw_review[topicid][keyinfo])
                else:
                    value.append(raw_review[topicid][keyinfo])
            values.append(value)  # "topicId", "nicknameShow", "topicContent"
            
        # step2 判断评论处于的时间段
        new_indices = []   # 转换为时间
        for i, x in enumerate(indices):
            try:
                indice_time = datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
            except Exception as e:
                indice_time = datetime.datetime.fromtimestamp(int(x) / 1e3)
            new_indices.append(indice_time)
            value = values[i]
            if indice_time <= proj["状态变换时间1-2"] and indice_time >= proj["状态变换时间0-1"]:
                value.append("预热-筹备")
            elif indice_time > proj["状态变换时间1-2"] and indice_time <= proj["状态变换时间2-3"]:
                value.append("筹备-众筹成功")
            elif indice_time > proj["状态变换时间2-3"] and indice_time <= proj["状态变换时间3-4"]:
                value.append("众筹成功-众筹结束")
            elif indice_time > proj["状态变换时间3-4"]:
                value.append("众筹结束后")
            elif indice_time <= proj["状态变换时间0-1"]:
                value.append("预热前")
            else:
                value.append(None)
                
        return indices, new_indices, values
    
    def merge_data(self):
        indices, new_indices, values = self.get_review_data()
        dynamic_data = self.get_dyn_diff_data()
        for i, indice_time in enumerate(new_indices):
            for j, u_time in enumerate(dynamic_data["更新时间"]):
                if indice_time < u_time:
                    values[i].extend([dynamic_data["筹集金额"][j-1], dynamic_data["完成百分比"][j-1], dynamic_data["更新时间"][j-1]])
                    break
            else:
                values[i].extend([dynamic_data["筹集金额"][-1], dynamic_data["完成百分比"][-1], dynamic_data["更新时间"][-1]])
                    
        colnames = self.review_colnames + ["category", "筹集金额", "完成百分比", "更新时间"]
        columns1 = colnames[1:]
        
        pdframe = pd.DataFrame(values, index=indices, columns=columns1).sort_index()
        pdframe["项目id"] = self.id
        pdframe["所属类别"] = self.category
        pdframe["目标金额"] = self.aim_fund
        pdframe["众筹期限"] = self.duration
        return pdframe

In [34]:
client = MongoClient()
db = client.moniter_crowdfunding
s_project = db.success_projects

In [15]:
proj = s_project.find_one({'_id': '101993'})
proj_data=ProjectReviewData(proj, diffu_colnames=DIFFUSION_COLNAMES, review_colnames=REVIEW_COLNAMES)

In [16]:
proj_data.merge_data()

Unnamed: 0,topicId,nicknameShow,topicContent,likeCount,replys,category,筹集金额,完成百分比,更新时间,项目id,所属类别,目标金额,众筹期限
1567933415000,9702458,jd_662dff4fabc4b,2999档位是真的蜜蜡吗？保真吗？白色的垫片是什么材质？,0,1,筹备-众筹成功,899,9.0,2019-09-08 12:29:31.893,101993,文化传承,10000,30
1568008871000,9702975,jd_7ab0d2997fcf7,南红大料、好料，无优化，真是越来越少了。喜欢，支持啦！,0,1,筹备-众筹成功,3197,32.0,2019-09-09 12:30:00.761,101993,文化传承,10000,30
1568009064000,9702977,jd_7ab0d2997fcf7,正红色，好漂亮呀，关注🎈🎈🎈🎉🎉🎉,0,1,筹备-众筹成功,3197,32.0,2019-09-09 12:30:00.761,101993,文化传承,10000,30
1568177711000,9704665,wdaGXfrDwmNFvv,感觉颜色很正的样子，喜欢，支持！！！,1,1,筹备-众筹成功,10594,106.0,2019-09-11 12:27:37.532,101993,文化传承,10000,30
1568356340000,9706256,半亩菜园,您好，请问有散珠吗？我想要2.0的单珠。,0,1,筹备-众筹成功,13196,132.0,2019-09-13 12:27:38.705,101993,文化传承,10000,30
