import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import requests
import csv
from BeautifulSoup import BeautifulSoup
def get_content(page):
url = 'http://search.51job.com/list/200200,000000,0000,32,9,99,python,2,'+str(page)+'.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
reponse = requests.get(url)
html = reponse.content
soup = BeautifulSoup(html)
return soup
def get(soup):
inf_list = list()
tag1 = soup.find('div', attrs={'class': 'jblist res'})
for label in tag1.findAll('a',attrs={'class':'e e2 eck'}):
title = label.find('h3').text.strip()
company = label.find('aside').text.strip()
money = label.find('em').text.strip()
inf_list.append((title, company, money))
with open("imdb.csv","a") as f:
fw = csv.writer(f)
fw.writerows(inf\_list)
with open("imdb.csv","wb") as f:
fw = csv.writer(f)
fw.writerow(['职位','公司','薪资'])
for j in range(1, 10):
print "-----正在爬第"+str(j)+"页内容---------"
html = get_content(j)
get(html)
手机扫一扫
移动阅读更方便
你可能感兴趣的文章