Selenium自动化测试工具——以爬取京东商品信息为例
需要安装的包
import re from selenium import webdriver from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from pyquery import PyQuery as pq import csv import time
完整代码
browser = webdriver.Chrome() wait = WebDriverWait(browser, 10) def search(): print(正在搜索) try: browser.get("https://www.jd.com/") input = wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, #key)) ) input.send_keys(keywords) submit = wait.until( EC.element_to_be_clickable((By.CSS_SELECTOR, #search > div > div.form > button))) time.sleep(3) submit.click() total = wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, #J_bottomPage > span.p-skip > em:nth-child(1)))) return total.text except TimeoutException: return search() def next_page(page_number): print(f正在翻第{page_number}页) try: print(定位到跳转页数) input = wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, #J_bottomPage > span.p-skip > input)) ) print(定位到跳转按钮,确保可点击) submit = wait.until(EC.element_to_be_clickable( (By.CSS_SELECTOR, #J_bottomPage > span.p-skip > a))) input.clear() input.send_keys(page_number) submit.click() # time.sleep(2) wait.until(EC.text_to_be_present_in_element( (By.CSS_SELECTOR, #J_bottomPage > span.p-num > a.curr), str(page_number))) get_products() except TimeoutException: next_page(page_number) def get_products(): wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, #J_goodsList > ul))) html = browser.page_source doc = pq(html,parser="html") items = doc(#J_goodsList .gl-item).items() for item in items: # print(item) image="http:"+str(item(.gl-i-wrap .p-img a img).attr(src)) price=item.find(.p-price).text() title=item.find(.p-name).text() title = title.strip( ) shop=item.find(div span a).text() comment=item.find(.p-commit a).text() product = [image,price,title,shop,comment] # product = ["http:"+str(item(.gl-i-wrap .p-img a img).attr(src)), item.find(.p-price).text(), item.find(.p-name).text(), item.find(div span a).text(), item.find(.p-commit a).text()] print(product) writer.writerow(product) def main(): keywords = input(请输入关键字:) total = search() total = int(re.compile((d+)).search(total).group(1)) global f f=open(result.csv,mode=w,encoding=gbk,newline=) global writer writer = csv.writer(f) head = [image, price, title, shop, comment] writer.writerow(head) # get_products() for i in range(2, total + 1): next_page(i) f.close() if __name__ == __main__: main()
上一篇:
5款热门的远程控制软件,让你事半功倍
下一篇:
软件测试如何快速入门