标签:index eric lte list sha http val header UI
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import json
import requests
url = "https://app.ganji.com/datashare/"
headers = {
"Content-Type": "application/x-www-form-urlencoded",
"userid": "C1ED10776D9B6108D8FEFEE4EA53058A",
"model":"Generic/iphone",
"customerid":"705",
"clientagent":"iPhone 6S Plus#414*736#11.0.3",
"versionid":"8.3.0",
"os":"ios",
"net":"wifi",
"dv":"iPhone 6S Plus",
"interface":"SearchPostsByJson3",
"accept-language":"zh-cn",
}
def req(url, headers, data):
content = None
try:
r = requests.post(url, headers=headers, data=data, timeout=5)
content = r.json()
except Exception as e:
print("requests error: ", e, "requests url: ", url)
return content
def get_ganji_list_data():
# 获取列表数据
data = ‘t=-576747455&&showType=0&showtype=0&jsonArgs={"pageSize":20,"cityScriptIndex":2300,"majorCategoryScriptIndex":7,"queryFilters":[],"categoryId":7,"andKeywords":[{"name":"title","value":"%E5%95%86%E9%93%BA%E5%87%BA%E5%94%AE"}],"customerId":"705","sortKeywords":[{"field":"post_at","sort":"desc"}],"pageIndex":1}‘
ganji_data = req(url, headers, data)
if ganji_data is not None:
return ganji_data
return None
def get_article_data():
ganji_data = get_ganji_list_data()
if ganji_data is not None:
data_list = ganji_data["posts"]
print("count: ", ganji_data["total"])
for data_ in data_list:
title, d_sign, puid = data_["title"], data_["d_sign"], data_["puid"]
print(title, d_sign)
data_article = "d_sign={0}&cityId=176&post_type_for_maidian=5&categoryId=7&spfy=0".format(d_sign)
# 根据 puid 获取详细信息. puid 需放在headers中
headers["interface"] = "GetPostByPuid"
headers["puid"] = puid
content_data = req(url, headers, data_article)
if content_data["status"] == 0:
data = content_data["data"]
end_data = {}
end_data["price"] = data["price"]["v"]
end_data["price_unit"] = data["price"]["u"]
end_data["title"] = data["title"]
end_data["city"] = data["city"]
end_data["description"] = data["description"]
end_data["district_name"] = data["district_name"]
end_data["street_name"] = data["street_name"]
end_data["latlng"] = data["latlng"]
end_data["id"] = data["id"]
time.sleep(2)
header里东西真多,最终测试 只需要这几种,累死宝宝了,
标签:index eric lte list sha http val header UI
原文地址:http://www.cnblogs.com/dockers/p/7811514.html