码迷,mamicode.com
首页 > 其他好文 > 详细

爬取数据来着

时间:2019-12-06 23:25:15      阅读:95      评论:0      收藏:0      [点我收藏+]

标签:date   top   str   desktop   ems   spec   mime   lan   name   

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-1\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-2\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-3\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-4\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-5\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-6\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-7\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-8\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-9\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-10\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-11\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-12\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-13\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-14\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-15\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-16\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-17\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-18\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-19\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-20\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-21\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-22\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-23\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-24\n",
"http://bang.dangdang.com/books/fivestars/01.41.00.00.00.00-recent30-0-0-1-25\n"
]
}
],
"source": [
"import requests\n",
"from lxml import etree\n",
"import time\n",
"\n",
"\n",
"with open(‘/Users/wanruo/Desktop/Dang.cvs‘,‘w‘,encoding=‘utf-8‘) as f:\n",
" \n",
" url=‘http://bang.dangdang.com/books/fivestars‘\n",
"\n",
" data=requests.get(url).text\n",
"\n",
" s=etree.HTML(data)\n",
"\n",
" items=s.xpath(‘//*[@id=\"sortRanking\"]/div/a/@href‘)\n",
" \n",
" for x in range(1,26):\n",
" page_url=items[0][0:74]+str(x)\n",
" print(page_url)\n",
" \n",
" r=requests.get(items[0]).text\n",
" ss=etree.HTML(r)\n",
"\n",
" file=ss.xpath(‘//ul[@class=\"bang_list clearfix bang_list_mode\"]/li‘)\n",
"\n",
" for each_item in items:\n",
" for book in file:\n",
" title=book.xpath(‘./div[@class=\"name\"]/a/@title‘)[0]\n",
" book_href=book.xpath(‘./div[@class=\"name\"]/a/@href‘)[0]\n",
" pinglun=book.xpath(‘./div[@class=\"star\"]/a/text()‘)[0].strip(‘条评论‘)\n",
" wuxing=book.xpath(‘./div[@class=\"biaosheng\"]/span/text()‘)[0].strip(‘次‘)\n",
"\n",
" date=book.xpath(‘./div[@class=\"publisher_info\"]/span/text()‘)[0]\n",
" \n",
"# try:\n",
" # price_e=book.xpath(‘./div[@class=\"price\"]/p[@class=\"price_e\"]/span/text()‘)[0]\n",
" # except:\n",
" # price_e=‘NA‘\n",
" \n",
"# try:\n",
" # company=book.xpath(‘./div[@class=\"publisher_info\"][2]/span/text()‘)[0]\n",
" # except:\n",
" # compant=‘NA‘\n",
" \n",
" \n",
" f.write(‘{},{},{},{},{}\\n‘.format(title,book_href,pinglun,wuxing,date))\n",
"\n",
" \n",
" \n",
" \n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

爬取数据来着

标签:date   top   str   desktop   ems   spec   mime   lan   name   

原文地址:https://www.cnblogs.com/linQingxuan/p/12000023.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!