Selenium自动化测试工具——以爬取京东商品信息为例
需要安装的包
import re from selenium import webdriver from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from pyquery import PyQuery as pq import csv import time
完整代码
browser = webdriver.Chrome()
wait = WebDriverWait(browser, 10)
def search():
print(正在搜索)
try:
browser.get("https://www.jd.com/")
input = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, #key))
)
input.send_keys(keywords)
submit = wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, #search > div > div.form > button)))
time.sleep(3)
submit.click()
total = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, #J_bottomPage > span.p-skip > em:nth-child(1))))
return total.text
except TimeoutException:
return search()
def next_page(page_number):
print(f正在翻第{page_number}页)
try:
print(定位到跳转页数)
input = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, #J_bottomPage > span.p-skip > input))
)
print(定位到跳转按钮,确保可点击)
submit = wait.until(EC.element_to_be_clickable(
(By.CSS_SELECTOR, #J_bottomPage > span.p-skip > a)))
input.clear()
input.send_keys(page_number)
submit.click()
# time.sleep(2)
wait.until(EC.text_to_be_present_in_element(
(By.CSS_SELECTOR, #J_bottomPage > span.p-num > a.curr), str(page_number)))
get_products()
except TimeoutException:
next_page(page_number)
def get_products():
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, #J_goodsList > ul)))
html = browser.page_source
doc = pq(html,parser="html")
items = doc(#J_goodsList .gl-item).items()
for item in items:
# print(item)
image="http:"+str(item(.gl-i-wrap .p-img a img).attr(src))
price=item.find(.p-price).text()
title=item.find(.p-name).text()
title = title.strip(
)
shop=item.find(div span a).text()
comment=item.find(.p-commit a).text()
product = [image,price,title,shop,comment]
# product = ["http:"+str(item(.gl-i-wrap .p-img a img).attr(src)), item.find(.p-price).text(), item.find(.p-name).text(), item.find(div span a).text(), item.find(.p-commit a).text()]
print(product)
writer.writerow(product)
def main():
keywords = input(请输入关键字:)
total = search()
total = int(re.compile((d+)).search(total).group(1))
global f
f=open(result.csv,mode=w,encoding=gbk,newline=)
global writer
writer = csv.writer(f)
head = [image, price, title, shop, comment]
writer.writerow(head)
# get_products()
for i in range(2, total + 1):
next_page(i)
f.close()
if __name__ == __main__:
main()
上一篇:
5款热门的远程控制软件,让你事半功倍
下一篇:
软件测试如何快速入门
