In [1]:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author: Yu Yang

# 知乎热榜

import requests
from requests.exceptions import RequestException
import time
import csv
import logging
import os

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

CURRENT_FILE_PATH = os.path.dirname(os.path.abspath('__file__'))
CSV_FILE_PATH = CURRENT_FILE_PATH + '/' + 'zhihu_recommend.csv'

# 将数据写入csv
def write_data_to_csv(moive_info: list):
    with open(CSV_FILE_PATH,"a+") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(moive_info)
        
# csv文件先写入默认行 用w覆盖
def config_default_csv():
    with open(CSV_FILE_PATH,"w") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['标题', "标签", '话题评论人数', "话题关注人数", '回复数量', '创建时间', '该回答评论数', '该回答赞同数', '回答'])


url = 'https://www.zhihu.com/api/v3/feed/topstory/recommend'
cookie = '_zap=06a20c28-5513-40f3-9d61-7c801cbf9b00; d_c0="AACnOPv2TA-PTpK7WdS6poIAn--DV1oxMcU=|1555643056"; _xsrf=wIEd6tN4RmC6MdK6buaARNa9osBmZhGP; capsion_ticket="2|1:0|10:1567129324|14:capsion_ticket|44:OTJkZWUyN2JhZTA1NGI4Mjk0NzZmYTJlNGY0OTM0Yzk=|b6a49ecdcb4bc80b7c0040ee14ef33c4ae3b4f4a0696cdfc9feb7606234dff1b"; z_c0="2|1:0|10:1567129348|4:z_c0|92:Mi4xRkh4MUFBQUFBQUFBQUtjNC1fWk1EeWNBQUFDRUFsVk5CQXlRWFFEeDRYT3JoUzFsa19VZ1J4R0FtYUlaRGUyRldR|034dbd023e3215ef217d74f0586fa372f59c1b59887c78ebd0a9d69a3c96a1a8"; tshl=; q_c1=19f51643b9714cdb9ccee998f8f14de4|1567130281000|1567130281000; tst=r; tgw_l7_route=7bacb9af7224ed68945ce419f4dea76d'
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
params = {
    'session_token': 'd2f3cb059b404a5a01b7b97059a95d24',
    'desktop': 'true',
    'page_number': 2,
    'limit': 6,
    'action': 'down',
    'after_id': 5,
}
headers = {
    'cookie': cookie,
'user-agent': user_agent
}

# 从知乎获取数据
def get_data_from_zhihu():
    try:
        re = requests.get(url, headers=headers, params=params)
    except RequestException as err:
        print(err)
    else:
        json = re.json()
        data = json.get('data')
        if not data:
            logger.debug('no data')
        else:
            return data

# 返回数据是问答类型
def handle_question_result(item):
    data_list = []
        
    target = item.get('target')

    # 标签
    data_list.append(item.get('action_text'))
    question = target.get('question')
    # 标题
    data_list.append(question.get('title'))
    data_list.append(question.get('answer_count'))
    data_list.append(question.get('follower_count'))
    data_list.append(question.get('comment_count'))

    created_time = question.get('created')
    local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(created_time))
    data_list.append(local_time)

    data_list.append(target.get('comment_count'))
    data_list.append(target.get('voteup_count'))

    # 回答
    # data_list.append(target.get('content'))
    data_list.append(target.get('excerpt_new'))
       
    return data_list

# 返回数据是文章类型
def handle_article_result(item):
    data_list = []

    target = item.get('target')

    # 标签
    data_list.append(item.get('action_text'))
    # 标题
    data_list.append(target.get('title'))
    data_list.append('')
    data_list.append('')
    data_list.append('')
    created_time = item.get('created_time')
    local_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(created_time))
    data_list.append(local_time)

    data_list.append(target.get('comment_count'))
    data_list.append(target.get('voteup_count'))

    # 回答
    # data_list.append(target.get('content'))
    data_list.append(target.get('excerpt_new'))
            
    return data_list


# 处理返回的json
def handle_result_with(data):
    if not data:
        return None
    
    data_list_recommend = []
    for item in data:
        target = item.get('target')
        if not target:
            # 如果没有target 可能是广告 剥离掉
            logger.info('no target 可能是广告\n')
            continue
        type = target.get('type')
        data_list = []
        if type == 'article':
            data_list = handle_article_result(item)
        elif type == 'answer':
            data_list = handle_question_result(item)
        else:
            continue
        data_list_recommend.append(data_list)

    return data_list_recommend


def run():
    config_default_csv()
    data = get_data_from_zhihu()
    data_list = handle_result_with(data)
    if not data_list:
        return
    for l in data_list:
        write_data_to_csv(l)


run()

