爬取分页文章中的图片


#coding=utf-8

import urllib
import urllib.request
import re

def getHtml(url):
page = urllib.request.urlopen(url)
html = page.read()
return html

def getImg(html):
reg = r'src="(.+?.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
return imglist

i = 1
for n in range(1,12):
html = getHtml("http://www.sxdaily.com.cn/n/2013/1121/c46-5275000-%s.html" %n).decode('GB2312')
for x in getImg(html):
urllib.request.urlretrieve(r'http://www.sxdaily.com.cn'+x,r"C:\Users\Administrator\Desktop\xian\%s.jpg" %i)
print('%s/11' %i)
i += 1
print ('finish!')



Leave a Reply

Your email address will not be published. Required fields are marked *