Skip to content

Commit 2c5f6a9

Browse files
author
yincongxian
committed
Add the solution to 0008
1 parent fbbbeba commit 2c5f6a9

File tree

1 file changed

+29
-0
lines changed

1 file changed

+29
-0
lines changed

renzongxian/0008/0008.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Source:https://github.com/Show-Me-the-Code/show-me-the-code
2+
# Author:renzongxian
3+
# Date:2014-12-20
4+
# Python 3.4
5+
6+
"""
7+
8+
第 0008 题:一个HTML文件,找出里面的正文。
9+
10+
"""
11+
12+
import urllib.request
13+
import re
14+
15+
16+
def get_body(url):
17+
html_content = urllib.request.urlopen(url).read()
18+
r = re.compile('<p>(?:<.[^>]*>)?(.*?)(?:<.[^>]*>)?</p>')
19+
result = r.findall(html_content.decode('GBK'))
20+
return result
21+
22+
23+
if __name__ == '__main__':
24+
body = get_body('http://tech.163.com/14/1219/01/ADPT7MTE000915BF.html')
25+
file_object = open('result.txt', 'w')
26+
for l in body:
27+
file_object.write(l + '\n')
28+
file_object.close()
29+

0 commit comments

Comments
 (0)