爬取图片网站图片源码
import urllib.request from bs4 import BeautifulSoup import requests import re import urllib.request import os WebUrl = https://pic.netbian.com/ headers = { User-Agent : Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36} file_path = rC:Users北海DesktopPythonStudystudypicturepicture for n in range(2,100): #2-100页面 url = https://pic.netbian.com/4kmeinv/index_%d.html % n r = requests.get(url,headers = headers) soup = BeautifulSoup(r.text,lxml) patterns = re.compile(src="(.*?)") p = re.findall(patterns,r.text) for i in range(len(p)): image_url = WebUrl+p[i] print(image_url) try: if not os.path.exists(file_path): #判断括号里的文件是否存在的意思 os.makedirs(file_path) #如果没有这个path则直接创建 file_suffix = os.path.split(image_url)[1] print(file_suffix) filename = {}{}.format(file_path, file_suffix) print(filename) urllib.request.urlretrieve(image_url, filename=filename) print(成功) except IOError as e: print(1, e) except Exception as e: print(2, e)
上一篇:
通过多线程提高代码的执行效率例子
下一篇:
Java高级特性 - 集合框架(1)