31
31
< link rel ="stylesheet " href ="/css/f25.css ">
32
32
< link rel ="stylesheet " href ="/css/highlight.css ">
33
33
34
- < link rel ="stylesheet " href ="/css/gitalk.css ">
35
-
36
34
37
35
<!-- Global site tag (gtag.js) - Google Analytics -->
38
36
< script async src ="https://www.googletagmanager.com/gtag/js?id=UA-147288599-1 "> </ script >
@@ -111,10 +109,51 @@ <h2 id="0-摘要"><a href="#0-摘要" class="headerlink" title="0.摘要"></a>0.
111
109
< li > 编写python代码并运行</ li >
112
110
< li > 展示词云结果</ li >
113
111
</ ol >
114
- < h2 id ="1-安装wordcloud "> < a href ="#1-安装wordcloud " class ="headerlink " title ="1.安装wordcloud "> </ a > 1.安装wordcloud</ h2 > < p > 可以在cmd窗口输入< figure class ="highlight plain "> < figcaption > < span > install wordcloud matplotlib```</ span > </ figcaption > < table > < tr > < td class ="gutter "> < pre > < span class ="line "> 1</ span > < br > < span class ="line "> 2</ span > < br > < span class ="line "> 3</ span > < br > < span class ="line "> 4</ span > < br > < span class ="line "> 5</ span > < br > < span class ="line "> 6</ span > < br > < span class ="line "> 7</ span > < br > < span class ="line "> 8</ span > < br > < span class ="line "> 9</ span > < br > < span class ="line "> 10</ span > < br > < span class ="line "> 11</ span > < br > < span class ="line "> 12</ span > < br > < span class ="line "> 13</ span > < br > < span class ="line "> 14</ span > < br > < span class ="line "> 15</ span > < br > < span class ="line "> 16</ span > < br > < span class ="line "> 17</ span > < br > < span class ="line "> 18</ span > < br > < span class ="line "> 19</ span > < br > < span class ="line "> 20</ span > < br > < span class ="line "> 21</ span > < br > < span class ="line "> 22</ span > < br > < span class ="line "> 23</ span > < br > < span class ="line "> 24</ span > < br > < span class ="line "> 25</ span > < br > < span class ="line "> 26</ span > < br > < span class ="line "> 27</ span > < br > < span class ="line "> 28</ span > < br > < span class ="line "> 29</ span > < br > < span class ="line "> 30</ span > < br > </ pre > </ td > < td class ="code "> < pre > < span class ="line "> </ span > < br > < span class ="line "> </ span > < br > < span class ="line "> </ span > < br > < span class ="line "> </ span > < br > < span class ="line "> ## 2.准备红楼梦文本</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> 文本可以用下面链接下载</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> </ span > < br > < span class ="line "> `https://github.com/flypythoncom/flypython/blob/master/wordcloud_hlm_seg.txt`</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> 或者可以自己写代码,对文本进行清洗,分词。</ span > < br > < span class ="line "> 这里需要安装jieba分词,`pip install jieba`</ span > < br > < span class ="line "> ``` python</ span > < br > < span class ="line "> import jieba</ span > < br > < span class ="line "> import re</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> special_character_removal = re.compile(r'[,。、【 】“”:;()《》‘’{}?!⑦%>℃.^-——=&#@¥『』]', re.IGNORECASE)</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> fw=open("hlm_seg.txt","w",encoding="utf-8")</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> with open('hlm.txt',encoding="utf-8") as fp:</ span > < br > < span class ="line "> for line in fp:</ span > < br > < span class ="line "> l = special_character_removal.sub('', line.strip())</ span > < br > < span class ="line "> words=jieba.cut(l)</ span > < br > < span class ="line "> t=" ".join(words)</ span > < br > < span class ="line "> fw.write(t)</ span > < br > < span class ="line "> fw.write("\n")</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> fw.close()</ span > < br > </ pre > </ td > </ tr > </ table > </ figure > </ p >
115
- < h2 id ="3-编写词云python代码并运行 "> < a href ="#3-编写词云python代码并运行 " class ="headerlink " title ="3. 编写词云python代码并运行 "> </ a > 3. 编写词云python代码并运行</ h2 > < figure class ="highlight python "> < table > < tr > < td class ="gutter "> < pre > < span class ="line "> 1</ span > < br > < span class ="line "> 2</ span > < br > < span class ="line "> 3</ span > < br > < span class ="line "> 4</ span > < br > < span class ="line "> 5</ span > < br > < span class ="line "> 6</ span > < br > < span class ="line "> 7</ span > < br > < span class ="line "> 8</ span > < br > < span class ="line "> 9</ span > < br > < span class ="line "> 10</ span > < br > < span class ="line "> 11</ span > < br > < span class ="line "> 12</ span > < br > < span class ="line "> 13</ span > < br > < span class ="line "> 14</ span > < br > < span class ="line "> 15</ span > < br > < span class ="line "> 16</ span > < br > < span class ="line "> 17</ span > < br > < span class ="line "> 18</ span > < br > < span class ="line "> 19</ span > < br > < span class ="line "> 20</ span > < br > < span class ="line "> 21</ span > < br > < span class ="line "> 22</ span > < br > < span class ="line "> 23</ span > < br > < span class ="line "> 24</ span > < br > < span class ="line "> 25</ span > < br > </ pre > </ td > < td class ="code "> < pre > < span class ="line "> < span class ="keyword "> from</ span > os < span class ="keyword "> import</ span > path </ span > < br > < span class ="line "> < span class ="keyword "> from</ span > wordcloud < span class ="keyword "> import</ span > WordCloud</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> d = path.dirname(__file__) </ span > < br > < span class ="line "> < span class ="comment "> # Read the whole text. </ span > </ span > < br > < span class ="line "> text = open(path.join(d, < span class ="string "> 'hlm_seg.txt'</ span > ),encoding=< span class ="string "> "utf-8"</ span > ).read() </ span > < br > < span class ="line "> < span class ="comment "> # Generate a word cloud image </ span > </ span > < br > < span class ="line "> < span class ="comment "> # font=path.join(d, "simkai.ttf") </ span > </ span > < br > < span class ="line "> font=< span class ="string "> 'C:/Windows/Fonts/simkai.ttf'</ span > </ span > < br > < span class ="line "> wordcloud = WordCloud(font_path=font,< span class ="comment "> #设置中文字体,不指定就会出现中文不显示 </ span > </ span > < br > < span class ="line "> width=< span class ="number "> 1024</ span > ,< span class ="comment "> #宽 </ span > </ span > < br > < span class ="line "> height=< span class ="number "> 840</ span > ,< span class ="comment "> #高 </ span > </ span > < br > < span class ="line "> background_color=< span class ="string "> 'white'</ span > ,< span class ="comment "> #设置背景色 </ span > </ span > < br > < span class ="line "> < span class ="comment "> # max_words=100,#最大词汇数 </ span > </ span > < br > < span class ="line "> < span class ="comment "> # max_font_size=100#最大号字体 </ span > </ span > < br > < span class ="line "> ).generate(text) </ span > < br > < span class ="line "> </ span > < br > < span class ="line "> < span class ="comment "> # Display the generated image: </ span > </ span > < br > < span class ="line "> < span class ="comment "> # the matplotlib way: </ span > </ span > < br > < span class ="line "> < span class ="keyword "> import</ span > matplotlib.pyplot < span class ="keyword "> as</ span > plt </ span > < br > < span class ="line "> </ span > < br > < span class ="line "> plt.figure() </ span > < br > < span class ="line "> plt.imshow(wordcloud) </ span > < br > < span class ="line "> plt.axis(< span class ="string "> "off"</ span > ) </ span > < br > < span class ="line "> plt.show()</ span > < br > </ pre > </ td > </ tr > </ table > </ figure >
112
+ < h2 id ="1-安装wordcloud "> < a href ="#1-安装wordcloud " class ="headerlink " title ="1.安装wordcloud "> </ a > 1.安装wordcloud</ h2 > < p > 可以在cmd窗口输入</ p >
113
+ < p > < code > pip install wordcloud matplotlib</ code > </ p >
114
+ < p > < img src ="http://jcjview.github.io/img/wordcloud001.png " alt > </ p >
115
+ < h2 id ="2-准备红楼梦文本 "> < a href ="#2-准备红楼梦文本 " class ="headerlink " title ="2.准备红楼梦文本 "> </ a > 2.准备红楼梦文本</ h2 > < p > 文本可以用下面链接下载</ p >
116
+ < p > < code > https://github.com/flypythoncom/flypython/blob/master/wordcloud_hlm_seg.txt</ code > </ p >
117
+ < p > 或者可以自己写代码,对文本进行清洗,分词。< br > 这里需要安装jieba分词,< code > pip install jieba</ code > </ p >
118
+ < pre > < code > import jieba
119
+ import re
120
+
121
+ special_character_removal = re.compile(r'[,。、【 】“”:;()《》‘’{}?!⑦%>℃.^-——=&#@¥『』]', re.IGNORECASE)
122
+
123
+ fw=open("hlm_seg.txt","w",encoding="utf-8")
124
+
125
+ with open('hlm.txt',encoding="utf-8") as fp:
126
+ for line in fp:
127
+ l = special_character_removal.sub('', line.strip())
128
+ words=jieba.cut(l)
129
+ t=" ".join(words)
130
+ fw.write(t)
131
+ fw.write("\n")
132
+ fw.close()</ code > </ pre > < h2 id ="3-编写词云python代码并运行 "> < a href ="#3-编写词云python代码并运行 " class ="headerlink " title ="3. 编写词云python代码并运行 "> </ a > 3. 编写词云python代码并运行</ h2 > < pre > < code > from os import path
133
+ from wordcloud import WordCloud
134
+
135
+ d = path.dirname(__file__)
136
+ # Read the whole text.
137
+ text = open(path.join(d, 'hlm_seg.txt'),encoding="utf-8").read()
138
+ # Generate a word cloud image
139
+ # font=path.join(d, "simkai.ttf")
140
+ font='C:/Windows/Fonts/simkai.ttf'
141
+ wordcloud = WordCloud(font_path=font,#设置中文字体,不指定就会出现中文不显示
142
+ width=1024,#宽
143
+ height=840,#高
144
+ background_color='white',#设置背景色
145
+ # max_words=100,#最大词汇数
146
+ # max_font_size=100#最大号字体
147
+ ).generate(text)
148
+
149
+ # Display the generated image:
150
+ # the matplotlib way:
151
+ import matplotlib.pyplot as plt
116
152
117
- < p > 结果:</ p >
153
+ plt.figure()
154
+ plt.imshow(wordcloud)
155
+ plt.axis("off")
156
+ plt.show()</ code > </ pre > < p > 结果:</ p >
118
157
< p > < img src ="http://jcjview.github.io/img/Figure_1.png " alt ="词云运行结果 "> </ p >
119
158
< p > 后台回复“词云”获得完整运行代码</ p >
120
159
< p > < em > 人生苦短,我用python早下班。如果觉得不错,对你工作中有帮助,请加我微信公众号flypython,我们一起探讨python相关问题</ em > </ p >
@@ -136,22 +175,5 @@ <h2 id="3-编写词云python代码并运行"><a href="#3-编写词云python代
136
175
</ section >
137
176
< script src ="/js/f25.js "> </ script >
138
177
139
- < script src ="/js/gitalk.min.js "> </ script >
140
-
141
- < script >
142
- var gitalkAdmin = 'xxg1413' . split ( ',' ) ;
143
- var gitalk = new Gitalk ( {
144
- clientID : 'd0e566bfc45c0b852c6c' ,
145
- clientSecret : '6b69b3a841c85a6223e5a904c47f5e2d84322980' ,
146
- repo : 'gitalk' ,
147
- owner : 'flypythoncom' ,
148
- admin : gitalkAdmin ,
149
- id : location . pathname . length > 50 ? location . pathname . substr ( 0 , 50 ) : location . pathname , // Ensure uniqueness and length less than 50
150
- distractionFreeMode : false // Facebook-like distraction free mode
151
- } ) ;
152
- gitalk . render ( 'gitalk-container' ) ;
153
- </ script >
154
-
155
-
156
178
</ body >
157
179
</ html >
0 commit comments