selenium_获取京东商品价格

import time
from selenium.webdriver.chrome.options import Options
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from pymongo import MongoClient
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import urllib

options = Options()

# 开启无界面模式
options.add_argument(--headless)
web = Chrome(options=options)


def get_save_goods():
    # 输入要搜索的商品名称
    good_name = input(请输入你要爬取的商品名称:)
    # 编码加入到url中
    good_name_unicode = urllib.parse.quote(good_name)
    url = fhttps://search.jd.com/Search?keyword={good_name_unicode}
    web.get(url)
    # 设置等待时间
    WAIT = WebDriverWait(web, 10)

    while True:
        # 新的商品页等待3秒
        time.sleep(3)
        # 隐式等待 根据下面要查询的元素 10s未找到就报错
        # web.implicitly_wait(10)

        # 获取爬取的页数
        page = WAIT.until(EC.element_to_be_clickable((By.XPATH, //span/a[@class="curr"]))).text
        print(f开始爬取第{page}页)

        # 获取商品信息的列表
        li_lists = web.find_elements(By.XPATH, //div[@id="J_goodsList"]/ul/li)
        print(len(li_lists))

        # 用xpath获取各个商品的信息
        for li_list in li_lists:

            name = WAIT.until(EC.element_to_be_clickable(li_list.find_element(By.XPATH, .//div[@class="p-name p-name-type-2"]/a/em))).text.replace(
, )

            join_name = .join(name)

            link = WAIT.until(EC.element_to_be_clickable(li_list.find_element(By.XPATH, .//div[@class="p-name p-name-type-2"]/a))).get_attribute(href)

            price = WAIT.until(EC.element_to_be_clickable(li_list.find_element(By.XPATH, .//div[@class="p-price"]//i))).text


            try: # 有的商品没有显示厂家
                shop = WAIT.until(EC.element_to_be_clickable(li_list.find_element(By.XPATH, .//div[@class="p-shop"]//a))).text
            except:
                shop = 厂家配送

            comment = WAIT.until(EC.element_to_be_clickable(li_list.find_element(By.CSS_SELECTOR, .p-commit a))).text
            data = {
                商品名称: name,
                商品链接: link,
                价格: price,
                商店名称: shop,
                评论: comment
            }
            # 储存到mongodb
            print(data)
            client = MongoClient(host=localhost, port=27017)
            # 库
            db = client[jd]
            # 表
            collections = db[good_name]
            # 插入数据
            collections.insert_one(data)

        # 爬完5页停止
        if int(page) == 5:
            break
        #     点击下一页
        js = WAIT.until(EC.element_to_be_clickable(web.find_element(By.XPATH, //a[@class="pn-next"])))
        js.click()

        # 等待加载完毕
        time.sleep(2)
经验分享 程序员 微信小程序 职场和发展