码迷,mamicode.com
首页 > 其他好文 > 详细

晴天前100页评论标签云分析显示

时间:2017-10-12 14:02:05      阅读:192      评论:0      收藏:0      [点我收藏+]

标签:state   获取   tor   python   cut   could   generate   blog   open   

#coding:utf-8
from os import path
from scipy.misc import imread
import matplotlib.pyplot as plt
import jieba
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

stopwords = {}
def importStopword(filename=‘‘):
    global stopwords
    f = open(filename, ‘r‘, encoding=‘utf-8‘)
    line = f.readline().rstrip()

    while line:
        stopwords.setdefault(line, 0)
        stopwords[line] = 1
        line = f.readline().rstrip()

    f.close()

def processChinese(text):
    seg_generator = jieba.cut(text)  # 使用结巴分词,也可以不使用


    seg_list = [i for i in seg_generator if i not in stopwords]

    seg_list = [i for i in seg_list if i != u‘ ‘]

    seg_list = r‘ ‘.join(seg_list)

    return seg_list


importStopword(filename=‘./stopwords.txt‘)


# 获取当前文件路径
# __file__ 为当前文件, 在ide中运行此行会报错,可改为
# d = path.dirname(‘.‘)
d = path.dirname(__file__)

text = open(path.join(d, ‘loves.txt‘),encoding =‘utf-8‘).read()
print(text)

#如果是中文
text = processChinese(text)#中文不好分词,使用Jieba分词进行
# read the mask / color image
# taken from http://jirkavinse.deviantart.com/art/quot-Real-Life-quot-Alice-282261010
# 设置背景图片
back_coloring = imread(path.join(d, "./image/love.jpg"))

wc = WordCloud( font_path=‘simhei.ttf‘,#设置字体
                background_color="white", #背景颜色
                max_words=2000,# 词云显示的最大词数
                mask=back_coloring,#设置背景图片
                #max_font_size=100, #字体最大值
                random_state=42,
                )
# 生成词云, 可以用generate输入全部文本(中文不好分词),也可以我们计算好词频后使用generate_from_frequencies函数
wc.generate(text)
# wc.generate_from_frequencies(txt_freq)
# txt_freq例子为[(‘词a‘, 100),(‘词b‘, 90),(‘词c‘, 80)]
# 从背景图片生成颜色值
image_colors = ImageColorGenerator(back_coloring)

plt.figure()
# 以下代码显示图片
plt.imshow(wc)
plt.axis("off")
plt.show()
# 绘制词云
‘‘‘plt.figure()


# recolor wordcloud and show
# we could also give color_func=image_colors directly in the constructor
plt.imshow(wc.recolor(color_func=image_colors))
plt.axis("off")


# 绘制背景图片为颜色的图片
plt.figure()
plt.imshow(alice_coloring, cmap=plt.cm.gray)
plt.axis("off")
plt.show()
‘‘‘
# 保存图片
wc.to_file(path.join(d, "名称.png"))

  

晴天前100页评论标签云分析显示

标签:state   获取   tor   python   cut   could   generate   blog   open   

原文地址:http://www.cnblogs.com/c-x-a/p/7655626.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!