@@ -115,45 +115,11 @@ <h2 id="1-安装wordcloud"><a href="#1-安装wordcloud" class="headerlink" title
115
115
< h2 id ="2-准备红楼梦文本 "> < a href ="#2-准备红楼梦文本 " class ="headerlink " title ="2.准备红楼梦文本 "> </ a > 2.准备红楼梦文本</ h2 > < p > 文本可以用下面链接下载</ p >
116
116
< p > < code > https://github.com/flypythoncom/flypython/blob/master/wordcloud_hlm_seg.txt</ code > </ p >
117
117
< p > 或者可以自己写代码,对文本进行清洗,分词。< br > 这里需要安装jieba分词,< code > pip install jieba</ code > </ p >
118
- < pre > < code > import jieba
119
- import re
118
+ < figure class ="highlight python "> < table > < tr > < td class ="gutter "> < pre > < span class ="line "> 1</ span > < br > < span class ="line "> 2</ span > < br > < span class ="line "> 3</ span > < br > < span class ="line "> 4</ span > < br > < span class ="line "> 5</ span > < br > < span class ="line "> 6</ span > < br > < span class ="line "> 7</ span > < br > < span class ="line "> 8</ span > < br > < span class ="line "> 9</ span > < br > < span class ="line "> 10</ span > < br > < span class ="line "> 11</ span > < br > < span class ="line "> 12</ span > < br > < span class ="line "> 13</ span > < br > < span class ="line "> 14</ span > < br > < span class ="line "> 15</ span > < br > < span class ="line "> 16</ span > < br > < span class ="line "> 17</ span > < br > </ pre > </ td > < td class ="code "> < pre > < span class ="line "> </ span > < br > < span class ="line "> < span class ="keyword "> import</ span > jieba</ span > < br > < span class ="line "> < span class ="keyword "> import</ span > re</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> special_character_removal = re.compile(< span class ="string "> r'[,。、【 】“”:;()《》‘’{}?!⑦%>℃.^-——=&#@¥『』]'</ span > , re.IGNORECASE)</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> fw=open(< span class ="string "> "hlm_seg.txt"</ span > ,< span class ="string "> "w"</ span > ,encoding=< span class ="string "> "utf-8"</ span > )</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> < span class ="keyword "> with</ span > open(< span class ="string "> 'hlm.txt'</ span > ,encoding=< span class ="string "> "utf-8"</ span > ) < span class ="keyword "> as</ span > fp:</ span > < br > < span class ="line "> < span class ="keyword "> for</ span > line < span class ="keyword "> in</ span > fp:</ span > < br > < span class ="line "> l = special_character_removal.sub(< span class ="string "> ''</ span > , line.strip())</ span > < br > < span class ="line "> words=jieba.cut(l)</ span > < br > < span class ="line "> t=< span class ="string "> " "</ span > .join(words)</ span > < br > < span class ="line "> fw.write(t)</ span > < br > < span class ="line "> fw.write(< span class ="string "> "\n"</ span > )</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> fw.close()</ span > < br > </ pre > </ td > </ tr > </ table > </ figure >
120
119
121
- special_character_removal = re.compile(r'[,。、【 】“”:;()《》‘’{}?!⑦%>℃.^-——=&#@¥『』]', re.IGNORECASE)
120
+ < h2 id ="3-编写词云python代码并运行 "> < a href ="#3-编写词云python代码并运行 " class ="headerlink " title ="3. 编写词云python代码并运行 "> </ a > 3. 编写词云python代码并运行</ h2 > < figure class ="highlight python "> < table > < tr > < td class ="gutter "> < pre > < span class ="line "> 1</ span > < br > < span class ="line "> 2</ span > < br > < span class ="line "> 3</ span > < br > < span class ="line "> 4</ span > < br > < span class ="line "> 5</ span > < br > < span class ="line "> 6</ span > < br > < span class ="line "> 7</ span > < br > < span class ="line "> 8</ span > < br > < span class ="line "> 9</ span > < br > < span class ="line "> 10</ span > < br > < span class ="line "> 11</ span > < br > < span class ="line "> 12</ span > < br > < span class ="line "> 13</ span > < br > < span class ="line "> 14</ span > < br > < span class ="line "> 15</ span > < br > < span class ="line "> 16</ span > < br > < span class ="line "> 17</ span > < br > < span class ="line "> 18</ span > < br > < span class ="line "> 19</ span > < br > < span class ="line "> 20</ span > < br > < span class ="line "> 21</ span > < br > < span class ="line "> 22</ span > < br > < span class ="line "> 23</ span > < br > < span class ="line "> 24</ span > < br > < span class ="line "> 25</ span > < br > < span class ="line "> 26</ span > < br > </ pre > </ td > < td class ="code "> < pre > < span class ="line "> </ span > < br > < span class ="line "> < span class ="keyword "> from</ span > os < span class ="keyword "> import</ span > path </ span > < br > < span class ="line "> < span class ="keyword "> from</ span > wordcloud < span class ="keyword "> import</ span > WordCloud</ span > < br > < span class ="line "> </ span > < br > < span class ="line "> d = path.dirname(__file__) </ span > < br > < span class ="line "> < span class ="comment "> # Read the whole text. </ span > </ span > < br > < span class ="line "> text = open(path.join(d, < span class ="string "> 'hlm_seg.txt'</ span > ),encoding=< span class ="string "> "utf-8"</ span > ).read() </ span > < br > < span class ="line "> < span class ="comment "> # Generate a word cloud image </ span > </ span > < br > < span class ="line "> < span class ="comment "> # font=path.join(d, "simkai.ttf") </ span > </ span > < br > < span class ="line "> font=< span class ="string "> 'C:/Windows/Fonts/simkai.ttf'</ span > </ span > < br > < span class ="line "> wordcloud = WordCloud(font_path=font,< span class ="comment "> #设置中文字体,不指定就会出现中文不显示 </ span > </ span > < br > < span class ="line "> width=< span class ="number "> 1024</ span > ,< span class ="comment "> #宽 </ span > </ span > < br > < span class ="line "> height=< span class ="number "> 840</ span > ,< span class ="comment "> #高 </ span > </ span > < br > < span class ="line "> background_color=< span class ="string "> 'white'</ span > ,< span class ="comment "> #设置背景色 </ span > </ span > < br > < span class ="line "> < span class ="comment "> # max_words=100,#最大词汇数 </ span > </ span > < br > < span class ="line "> < span class ="comment "> # max_font_size=100#最大号字体 </ span > </ span > < br > < span class ="line "> ).generate(text) </ span > < br > < span class ="line "> </ span > < br > < span class ="line "> < span class ="comment "> # Display the generated image: </ span > </ span > < br > < span class ="line "> < span class ="comment "> # the matplotlib way: </ span > </ span > < br > < span class ="line "> < span class ="keyword "> import</ span > matplotlib.pyplot < span class ="keyword "> as</ span > plt </ span > < br > < span class ="line "> </ span > < br > < span class ="line "> plt.figure() </ span > < br > < span class ="line "> plt.imshow(wordcloud) </ span > < br > < span class ="line "> plt.axis(< span class ="string "> "off"</ span > ) </ span > < br > < span class ="line "> plt.show()</ span > < br > </ pre > </ td > </ tr > </ table > </ figure >
122
121
123
- fw=open("hlm_seg.txt","w",encoding="utf-8")
124
-
125
- with open('hlm.txt',encoding="utf-8") as fp:
126
- for line in fp:
127
- l = special_character_removal.sub('', line.strip())
128
- words=jieba.cut(l)
129
- t=" ".join(words)
130
- fw.write(t)
131
- fw.write("\n")
132
- fw.close()</ code > </ pre > < h2 id ="3-编写词云python代码并运行 "> < a href ="#3-编写词云python代码并运行 " class ="headerlink " title ="3. 编写词云python代码并运行 "> </ a > 3. 编写词云python代码并运行</ h2 > < pre > < code > from os import path
133
- from wordcloud import WordCloud
134
-
135
- d = path.dirname(__file__)
136
- # Read the whole text.
137
- text = open(path.join(d, 'hlm_seg.txt'),encoding="utf-8").read()
138
- # Generate a word cloud image
139
- # font=path.join(d, "simkai.ttf")
140
- font='C:/Windows/Fonts/simkai.ttf'
141
- wordcloud = WordCloud(font_path=font,#设置中文字体,不指定就会出现中文不显示
142
- width=1024,#宽
143
- height=840,#高
144
- background_color='white',#设置背景色
145
- # max_words=100,#最大词汇数
146
- # max_font_size=100#最大号字体
147
- ).generate(text)
148
-
149
- # Display the generated image:
150
- # the matplotlib way:
151
- import matplotlib.pyplot as plt
152
-
153
- plt.figure()
154
- plt.imshow(wordcloud)
155
- plt.axis("off")
156
- plt.show()</ code > </ pre > < p > 结果:</ p >
122
+ < p > 结果:</ p >
157
123
< p > < img src ="http://jcjview.github.io/img/Figure_1.png " alt ="词云运行结果 "> </ p >
158
124
< p > 后台回复“词云”获得完整运行代码</ p >
159
125
< p > < em > 人生苦短,我用python早下班。如果觉得不错,对你工作中有帮助,请加我微信公众号flypython,我们一起探讨python相关问题</ em > </ p >
0 commit comments