We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent fbbbeba commit 2c5f6a9Copy full SHA for 2c5f6a9
renzongxian/0008/0008.py
@@ -0,0 +1,29 @@
1
+# Source:https://github.com/Show-Me-the-Code/show-me-the-code
2
+# Author:renzongxian
3
+# Date:2014-12-20
4
+# Python 3.4
5
+
6
+"""
7
8
+第 0008 题:一个HTML文件,找出里面的正文。
9
10
11
12
+import urllib.request
13
+import re
14
15
16
+def get_body(url):
17
+ html_content = urllib.request.urlopen(url).read()
18
+ r = re.compile('<p>(?:<.[^>]*>)?(.*?)(?:<.[^>]*>)?</p>')
19
+ result = r.findall(html_content.decode('GBK'))
20
+ return result
21
22
23
+if __name__ == '__main__':
24
+ body = get_body('http://tech.163.com/14/1219/01/ADPT7MTE000915BF.html')
25
+ file_object = open('result.txt', 'w')
26
+ for l in body:
27
+ file_object.write(l + '\n')
28
+ file_object.close()
29
0 commit comments