爬取图片网站图片源码
import urllib.request
from bs4 import BeautifulSoup
import requests
import re
import urllib.request
import os
WebUrl = https://pic.netbian.com/
headers = {
User-Agent : Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36}
file_path = rC:Users北海DesktopPythonStudystudypicturepicture
for n in range(2,100): #2-100页面
url = https://pic.netbian.com/4kmeinv/index_%d.html % n
r = requests.get(url,headers = headers)
soup = BeautifulSoup(r.text,lxml)
patterns = re.compile(src="(.*?)")
p = re.findall(patterns,r.text)
for i in range(len(p)):
image_url = WebUrl+p[i]
print(image_url)
try:
if not os.path.exists(file_path): #判断括号里的文件是否存在的意思
os.makedirs(file_path) #如果没有这个path则直接创建
file_suffix = os.path.split(image_url)[1]
print(file_suffix)
filename = {}{}.format(file_path, file_suffix)
print(filename)
urllib.request.urlretrieve(image_url, filename=filename)
print(成功)
except IOError as e:
print(1, e)
except Exception as e:
print(2, e)
上一篇:
通过多线程提高代码的执行效率例子
下一篇:
Java高级特性 - 集合框架(1)
