快捷搜索: 王者荣耀 脱发

爬取图片网站图片源码

import urllib.request
from bs4 import BeautifulSoup
import requests
import re
import urllib.request
import os
WebUrl = https://pic.netbian.com/
headers = {
          
   User-Agent : Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36}
file_path = rC:Users北海DesktopPythonStudystudypicturepicture
for n in range(2,100): #2-100页面
    url = https://pic.netbian.com/4kmeinv/index_%d.html % n
    r = requests.get(url,headers = headers)
    soup = BeautifulSoup(r.text,lxml)
    patterns = re.compile(src="(.*?)")
    p = re.findall(patterns,r.text)
    for i in range(len(p)):
        image_url = WebUrl+p[i]
        print(image_url)
        try:
            if not os.path.exists(file_path): #判断括号里的文件是否存在的意思
                os.makedirs(file_path) #如果没有这个path则直接创建
            file_suffix = os.path.split(image_url)[1]
            print(file_suffix)
            filename = {}{}.format(file_path, file_suffix)
            print(filename)
            urllib.request.urlretrieve(image_url, filename=filename)
            print(成功)
        except IOError as e:
            print(1, e)

        except Exception as e:
            print(2, e)
经验分享 程序员 微信小程序 职场和发展