标签:
抓取结果:
Year: 15Fall
Degree: MS
Offer/Rej: Rej
Major: CS
University: Rutgers
T:
GRE:
GPA: ()
Detailed Major:
BackGround: 本科其他
Abroad_BackGround:
源代码如下:
# -*- coding: utf-8 -*-
import urllib.parse
import urllib.request
url = ‘http://www.1point3acres.com/bbs/forum.php?mod=forumdisplay&fid=82&sortid=164&sortid=164&page=2‘
req = urllib.request.Request(url)
response = urllib.request.urlopen(req) the_page = response.read()
con1 = the_page.decode(‘gbk‘)
year_start = con1.find(‘#666">‘) year_end = con1.find(‘</font>‘,year_start)
degree_start = con1.find(‘blue">‘,year_end) degree_end = con1.find(‘</font>‘,degree_start)
offer_start = con1.find(‘"black"><b>‘,degree_end) offer_end = con1.find(‘</b>]</font>‘,offer_start)
major_start = con1.find(‘"#F60"><b>‘,offer_end) major_end = con1.find(‘</b></font>‘,major_start)
school_start = con1.find(‘"#00B2E8">‘,major_end) school_end = con1.find(‘</font>‘,school_start)
t_start = con1.find(‘T</b>:‘,school_end) t_end = con1.find(‘</font>‘,t_start)
g_start = con1.find(‘<b>G</b>‘,t_end) g_end = con1.find(‘</font>‘,g_start)
major2_start = con1.find(‘<font color="green">‘,g_end) major2_end = con1.find(‘</font>‘,major2_start)
gpa_start = con1.find(‘<font color="darkcyan">‘,major2_end) gpa_end = con1.find(‘</font>‘,gpa_start)
homebj_start = con1.find(‘<font color="purple">‘,gpa_end) homebj_end = con1.find(‘</font>‘,homebj_start)
abroadbj_start = con1.find(‘<font color="hotpink">‘,homebj_end) abroadbj_end = con1.find(‘</font>‘,abroadbj_start)
year = con1[year_start + 7 :year_end]
degree = con1[degree_start + 6 : degree_end]
offer = con1[offer_start + 11 : offer_end]
major = con1[major_start + 10 : major_end]
school = con1[school_start + 10 : school_end]
toefl = con1[t_start + 6 : t_end] gre = con1[g_start + 9 : g_end]
major2 = con1[major2_start+ 20:major2_end]
gpa = con1[gpa_start + 23 : gpa_end]
homebj = con1[homebj_start + 21 : homebj_end]
abroadbj = con1[abroadbj_start + 22 : abroadbj_end]
print ("=======++========")
print("Year: "+year)
print("Degree: "+degree)
print("Offer/Rej: "+offer)
print("Major: "+major)
print("University: " +school)
print("T: " + toefl)
print("GRE: " + gre)
print("GPA: "+ gpa)
print("Detailed Major: "+major2)
print("BackGround: "+homebj)
print("Abroad_BackGround: "+abroadbj)
con =str(con1)
fin = open("day01.txt",‘w‘)
fin.write(year + "===" + degree+"===" +offer +"===" + major + "===" + school + "==="+ toefl + "===" + gre + "==="+major2 +"==="+gpa + "==="+homebj + "==="+ abroadbj)
fin.close()
标签:
原文地址:http://www.cnblogs.com/lovemypa/p/4485028.html