import requests
url = "http://news.gzcc.cn/html/xiaoyuanxinwen/"
res = requests.get(url)
res.encoding = ‘utf-8‘
# 利用BeautifulSoup的HTML解析器,生成结构树
from bs4 import BeautifulSoup
soup = BeautifulSoup(res.text, ‘html.parser‘)
for news in soup.select(‘li‘):
if len(news.select(‘.news-list-title‘)) > 0:
#首页文章标题
title = news.select(‘.news-list-title‘)[0].text
#首页文章描述
description = news.select(‘.news-list-description‘)[0].text
#首页文章信息
info = news.select(‘.news-list-info‘)[0].text
#首页文章链接
href = news.select(‘a‘)[0][‘href‘]
url = href
res= requests.get(url)
res.encoding = ‘utf-8‘
soup = BeautifulSoup(res.text, ‘html.parser‘)
#获取每篇文章的信息
newinfo = soup.select(‘.show-info‘)[0].text
#获取文章内容
content = soup.select(‘#content‘)[0].text
#日期
date = newinfo.split()[0]
#当日时间
time = newinfo.split()[1]
#作者
author = newinfo.split()[2]
#审核
checker = newinfo.split()[3]
#来源
source = newinfo.split()[4]
print(‘------------------------------------------------------------------------------‘)
print("文章标题:" + title )
print("\n文章描述:" + description )
print("\n文章信息:\n"+date +‘ ‘+ time +‘\n‘+ author +‘\n‘+ checker +‘\n‘+ source)
print("\n文章链接:" + href )
print(content)
print(‘------------------------------------------------------------------------------‘)
