代码如下:
fromcreepyimportCrawler
fromBeautifulSoupimportBeautifulSoup
importurllib2
importjson
classMyCrawler(Crawler):
defprocess_document(self,doc):
ifdoc.status==200:
print‘[%d]%s’%(doc.status,doc.url)
try:
soup=BeautifulSoup(doc.text.decode(‘gb18030’).encode(‘utf-8’))
exceptExceptionase:
printe
soup=BeautifulSoup(doc.text)
printsoup.find(id=”product-intro”).div.h1.text
url_id=urllib2.unquote(doc.url).decode(‘utf8’).split(‘/’)[-1].split(‘.’)[0]
f=urllib2.urlopen(‘?skuid=J_’+url_id,timeout=5)
price=json.loads(f.read())
f.close()
printprice[0][‘p’]
else:
pass
crawler=MyCrawler()
crawler.set_follow_mode(Crawler.F_SAME_HOST)
crawler.set_concurrency_level(16)
crawler.add_url_filter(‘\.(jpg|jpeg|gif|png|js|css|swf)$’)
crawler.crawl(‘’)
转载请注明:seo-网站优化-网站建设?python抓取京东价格分析京东商品价格走势
文章地址:https://www.tianxianmao.com/article/online/13193.html