Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
wanglu committed Jun 10, 2021
1 parent 19d6c93 commit d973b7d
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 61 deletions.
Binary file modified dist/douyincrawler.exe
Binary file not shown.
127 changes: 66 additions & 61 deletions douyincrawler.py
Expand Up @@ -52,70 +52,75 @@ def get_video(url, title):
return f'{title.split("/")[-1][:-4]} ===> 下载成功。'
except Exception as e:
total_num -= 1
with open(f'{Path}/失败链接.txt', 'a', encoding='utf-8') as f:
f.write(url)
f.write('\n')
with open(f'{Path}/失败链接.txt', 'a', encoding='utf-8') as t:
t.write(url)
t.write('\n')
return f'{title.split("/")[-1][:-4]} ===> 下载失败。'


awemeurl = 'https://www.iesdouyin.com/web/api/v2/aweme/post/?'
douyinurl = 'https://www.iesdouyin.com/web/api/v2/user/info/?'
headers = {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 ' + \
'(KHTML, like Gecko) Chrome/89.0.4389.114 Mobile Safari/537.36'
}

input_str = input('粘贴你要爬取的抖音号的链接:')
issue_start = input('输入你要从哪个时间开始爬取(2018年1月:输入2018.01)回车默认此时间:')
if issue_start == '':
issue_start = '2018.01'
if issue_start.split('.')[-1][0] == '0':
month = int(issue_start.split('.')[-1][-1])
else:
month = int(issue_start.split('.')[-1])
year = int(issue_start.split('.')[0])
current = datetime.date(year, month, 1)
today = datetime.date.today()
timepool = []
while current <= today:
if __name__ == '__main__':
awemeurl = 'https://www.iesdouyin.com/web/api/v2/aweme/post/?'
douyinurl = 'https://www.iesdouyin.com/web/api/v2/user/info/?'
headers = {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/' + \
'537.36(KHTML, like Gecko) Chrome/89.0.4389.114 Mobile Safari/537.36'
}

input_str = input('粘贴你要爬取的抖音号的链接:')
issue_start = input('输入你要从哪个时间开始爬取(2018年1月:输入2018.01)回车默认此时间:')

if issue_start == '':
issue_start = '2018.01'
if issue_start.split('.')[-1][0] == '0':
month = int(issue_start.split('.')[-1][-1])
else:
month = int(issue_start.split('.')[-1])
year = int(issue_start.split('.')[0])
current = datetime.date(year, month, 1)
today = datetime.date.today()
timepool = []
while current <= today:
timepool.append(current.strftime('%Y-%m-%d 00:00:00'))
current += relativedelta(months=1)
timepool.append(current.strftime('%Y-%m-%d 00:00:00'))
current += relativedelta(months=1)
timepool.append(current.strftime('%Y-%m-%d 00:00:00'))

shroturl = re.findall('[a-z]+://[\S]+', input_str, re.I | re.M)[0]
startpage = requests.get(url=shroturl, headers=headers, allow_redirects=False)
location = startpage.headers['location']
sec_uid = re.findall('(?<=sec_uid=)[a-z,A-Z,0-9, _, -]+', location, re.M | re.I)[0]
getname = requests.get(url=f'{douyinurl}sec_uid={sec_uid}', headers=headers).text
userinfo = json.loads(getname)
name = userinfo['user_info']['nickname']
Path = name

print('\n获取视频链接中,请稍候。。。。。。\n')
json_data = {}
total_num = 0
k = len(timepool)

task_list = []
with ThreadPoolExecutor() as executor:
for i in range(k):
if i < k - 1:
task = executor.submit(get_data, timepool[i], timepool[i + 1])

shroturl = re.findall('[a-z]+://[\S]+', input_str, re.I | re.M)[0]
startpage = requests.get(url=shroturl, headers=headers, allow_redirects=False)
location = startpage.headers['location']
sec_uid = re.findall('(?<=sec_uid=)[a-z,A-Z,0-9, _, -]+', location, re.M | re.I)[0]
getname = requests.get(url=f'{douyinurl}sec_uid={sec_uid}', headers=headers).text
userinfo = json.loads(getname)
name = userinfo['user_info']['nickname']
Path = name

os.makedirs(f'{Path}', exist_ok=True)
with open(f'{Path}/分享链接.txt', 'a', encoding='utf-8') as f:
f.write(shroturl)
print('\n获取视频链接中,请稍候。。。。。。\n')
json_data = {}
total_num = 0
k = len(timepool)

task_list = []
with ThreadPoolExecutor() as executor:
for i in range(k):
if i < k - 1:
task = executor.submit(get_data, timepool[i], timepool[i + 1])
task_list.append(task)
for res in as_completed(task_list):
res.result()

task_list = []
with ThreadPoolExecutor() as executor:
for key, value in json_data.items():
task = executor.submit(get_video, key, value)
task_list.append(task)
for res in as_completed(task_list):
res.result()

task_list = []
with ThreadPoolExecutor() as executor:
for key, value in json_data.items():
task = executor.submit(get_video, key, value)
task_list.append(task)
for res in as_completed(task_list):
print(res.result())

print(f'\n共下载 {total_num} 个抖音视频,在当前目录下查看。')
print('Enjoy it')
print('Powered by wanglu58\n')
key = input('按回车键退出\n')
while key != '':
for res in as_completed(task_list):
print(res.result())

print(f'\n共下载 {total_num} 个抖音视频,请在当前目录下查看。')
print('Enjoy it')
print('Powered by wanglu58\n')
key = input('按回车键退出\n')
while key != '':
key = input('按回车键退出\n')

0 comments on commit d973b7d

Please sign in to comment.