标签:position weight out log 权重 and lse 删除 ever
数据没有经过处理,那么他就只是一堆数据。
如果可能够将数据进行可视化操作,那数据它就可以很轻松的说明问题啦。
绘图工具:
基于Python
pyecharts,这里主要使用pyecharts去一个简单的介绍。
matplotib,底层,学习需要一定成本
seaborn,对matplotib的一个封装。
pyecharts官方文档:http://gallery.pyecharts.org/#/
pip install
pyecharts
主要使用pandas模块,
清理空值
去除重复项
将数据处理一致等,
以下两篇文章是我在CSDN写的博文,对于简单的数据清洗,不妨一看。
遇到“脏乱差”的Excel数据怎么办??利用Python规范Excel表格数据(数据清洗)
【数据分析】Python分析淘宝4200款Bra,发现最好卖的款式居然是。。。
导入模块:
import pandas as pd
# 打开文档
df = pd.read_excel(‘taobao_goods.xlsx‘)
删除重复的行:
# 删除行完全一样的值
df.drop_duplicates(inplace=True)
# 删除列重复的值
df.drop_duplicates(subset=[‘列名‘,‘列名‘])
对地理位置进行处理:
location_list = []
for location in df[‘location‘]:
    location = location.split(‘ ‘)[0]
    location_list.append(location)
df[‘location‘] = location_list
对销售量进行处理:
sales_list = []
for sale in df[‘sales‘]:
    sale = sale[:-3].replace(‘+‘, ‘‘)
    if ‘万‘ in sale:
        sale = int(float(sale.replace(‘万‘, ‘‘)) * 10000)
    sales_list.append(sale)
df[‘sales‘] = sales_list
保存为新的表格:
df.to_excel(‘new_taobao_goods.xlsx‘,index=None)
导入模块
import jieba
import pandas as pd
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.globals import SymbolType
from pyecharts.charts import Pie, Bar, Map, WordCloud, Page
两种方法:
pyecharts自带的生成词云wordcloud 模块生成词云(推荐方法一:
stop_words_txt = ‘stop_words.txt‘
# 载入停用词,即过滤词
jieba.analyse.set_stop_words(stop_words_txt)
# TextRank 关键词抽取,只获取固定词性
# topK为返回权重最大的关键词,默认值为20
# withWeight为返回权重值,默认为False
keywords_count_list = jieba.analyse.textrank(‘ ‘.join(df1.comment), topK=100, withWeight=True)
print(keywords_count_list)
word_cloud = (
    WordCloud()
        .add("", keywords_count_list, word_size_range=[5, 50], 
             shape=SymbolType.TRIANGLE,
            )
        .set_global_opts(title_opts=opts.TitleOpts(title="这里输入标题"))
)
# 这句话是渲染成一个html文件到当前文件夹下面
#     word_cloud.render(‘WordCloud.html‘)
方法二:(推荐,可自定义
pip install
wordcloud
import jieba
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from wordcloud import WordCloud
# 打开文本
# text = open(‘1.txt‘,encoding=‘utf-8‘).read()
 
# 中文分词
text = ‘ ‘.join(jieba.cut(text))
 
# 生成对象
mask = np.array(Image.open("input_picture"))
wc = WordCloud(mask=mask,font_path=‘C:\Windows\Fonts\SimHei.ttf‘,mode=‘RGBA‘).generate(text)
 
# 显示词云
# plt.imshow(wc, interpolation=‘bilinear‘)
# plt.axis("off")
# plt.show()
 
# 保存到文件
wc.to_file(‘output_picture‘)
一般柱状图:
bar = (
    Bar()
    .add_xaxis(Faker.days_attrs)
    .add_yaxis("商家A", Faker.days_values)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="Bar-DataZoom(slider+inside)"),
    )
#     .render("bar_datazoom_both.html")
)
横向柱状图:
.reversal_axis()
.set_series_opts(label_opts=opts.LabelOpts(position="right"))
滑块柱状图:
datazoom_opts=[opts.DataZoomOpts()]
数据来自:standard_goods_comments.xlsx
这里用cup做展示
[(‘B‘, 1909), (‘C‘, 810), (‘A‘, 696), (‘D‘, 259)]
多图显示cup:
from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.commons.utils import JsCode
fn = """
    function(params) {
        if(params.name == ‘other‘)
            return ‘\\n\\n\\n‘ + params.name + ‘ : ‘ + params.value + ‘%‘;
        return params.name + ‘ : ‘ + params.value + ‘%‘;
    }
    """
def new_label_opts():
    return opts.LabelOpts(formatter=JsCode(fn), position="center")
pie = (
    Pie()
    .add(
        "",
        [[‘A_cup‘, round(696/total_cup, 2)*100],[‘other‘,round(1 - 696/total_cup, 2)*100]],
        center=["20%", "30%"],
        radius=[60, 80],
        label_opts=new_label_opts(),
    )
    .add(
        "",
        [[‘B_cup‘, round(1909/total_cup, 2)*100],[‘other‘,round(1 - 1909/total_cup, 2)*100]],
        center=["55%", "30%"],
        radius=[60, 80],
        label_opts=new_label_opts(),
    )
    .add(
        "",
        [[‘C_cup‘, round(810/total_cup, 2)*100],[‘other‘,round(1 - 810/total_cup, 2)*100]],
        center=["20%", "70%"],
        radius=[60, 80],
        label_opts=new_label_opts(),
    )
    .add(
        "",
        [[‘D_cup‘, round(259/total_cup * 100, 1)],[‘other‘,round(1 - 259/total_cup, 2)*100]],
        center=["55%", "70%"],
        radius=[60, 80],
        label_opts=new_label_opts(),
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="Cup-多饼图"),
        legend_opts=opts.LegendOpts(
            type_="scroll", pos_top="20%", pos_left="80%", orient="vertical"
        ),
    )
#     .render("mutiple_pie.html")
)
疫情展示:
from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.faker import Faker
v = Faker.choose()
pie = (
    Pie()
    .add(
        "",
        [list(z) for z in zip(v, list(range(10,80,10)))],
        radius=["30%", "75%"],
        center=["25%", "50%"],
        rosetype="radius",
        label_opts=opts.LabelOpts(is_show=False),
    )
    .add(
        "",
        [list(z) for z in zip(v,list(range(10,80,10))[::-1])],
        radius=["30%", "75%"],
        center=["75%", "50%"],
        rosetype="area",
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="Pie-玫瑰图示例"))
)
from pyecharts import options as opts
from pyecharts.charts import Map
from pyecharts.faker import Faker
map = (
    Map()
    .add("店铺数量",[[‘广东‘,100],[‘广西‘,100],[‘湖南‘,19,]], "china")
    .set_global_opts(
        title_opts=opts.TitleOpts(title="商家店铺地址分布图"),
        visualmap_opts=opts.VisualMapOpts(max_=200),
    )
)
天气:
from pyecharts import options as opts
from pyecharts.charts import Liquid
liquid = (
    Liquid()
    .add("lq", [0.45,0.5])	
 	# 第一个值为显示的值,第二个值为水的分量
    .set_global_opts(title_opts=opts.TitleOpts(title="今日湿度"))
#     .render("liquid_base.html")
)
Page.save_resize_html(‘page_draggable_layout.html‘,cfg_file= ‘chart_config.json‘)
标签:position weight out log 权重 and lse 删除 ever
原文地址:https://www.cnblogs.com/codehao/p/14781465.html