代码如下:
#-*-coding:UTF-8-*-
”’
Createdon2021-12-5
@author:good-temper
”’
importurllib2
importbs4
importtime
defgetPage(urlStr):
”’
获取页面内容
”’
content=urllib2.urlopen(urlStr).read()
returncontent
defgetNextPageUrl(currPageNum):
#页码-1-1-72-4137-33.html
url=u’‘+str(currPageNum+1)+’-1-1-72-4137-33.html’
#是否有下一页
content=getPage(url);
soup=bs4.BeautifulSoup(content)
list=soup.findAll(‘span’,{‘class’:’next-disabled’});
if(len(list)==0):
returnurl
return”
defanalyzeList():
pageNum=0
list=[]
url=getNextPageUrl(pageNum)
whileurl!=”:
soup=bs4.BeautifulSoup(getPage(url))
pagelist=soup.findAll(‘div’,{‘class’:’p-name’})
foreleminpagelist:
soup1=bs4.BeautifulSoup(str(elem))
list.append(soup1.find(‘a’)[‘href’])
pageNum=pageNum+1
printpageNum
url=getNextPageUrl(pageNum)
returnlist
defanalyzeContent(url):
return”
defwriteToFile(list,path):
f=open(path,‘a’)
foreleminlist:
f.write(elem+’
’)
f.close()
if__name__==‘__main__’:
list=analyzeList()
print‘共抓取’+str(len(list))+’条
’
writeToFile(list,u’E:\\jd_phone_list.dat’);
转载请注明:seo-网站优化-网站建设?python抓取京东商城手机列表url实例代码
文章地址:https://www.tianxianmao.com/article/online/12705.html