selenium爬取网页_vivo定位查找手机登录网址

(39) 2024-08-15 10:01:01

selenium模拟登录爬取多页面vivio手机信息,xpath进行数据解析,最终追加保存为csv形式

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from lxml import etree
import pandas as pd
import time
import os

#selinum通过搜索进入手机信息页面
def login(url):
    driver.get(url)
    driver.maximize_window()
    # 点击搜索框
    search_btn = wait.until(EC.element_to_be_clickable((By.XPATH, '//span[@class="vp-head-search"]')))
    search_btn.click()
    # 输入需要爬取的商品信息并再次搜索
    search_btm = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@class="vp-head-search-input"]/input')))
    search_btm.click()
    search_btp = wait.until(EC.presence_of_element_located((By.XPATH, '//div[@class="vp-head-search-input"]/input')))
    search_btp.send_keys('手机')
    search_btq = wait.until(EC.element_to_be_clickable((By.XPATH, '//span[@class="vp-head-search-confirm"]')))
    search_btq.click()
    time.sleep(5)
    return driver.page_source


# 通过点击下一页按钮,获取出第1页外其它页网页源代码
def get_next_page():
    # 将滚动条拖动至页面底端,使下一页按钮显露出来
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
    # 查找下一页按钮
    next_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'pagingR')))
    # 单击按钮
    next_button.click()
    time.sleep(5)
    return driver.page_source

def parse_page(html):
    dom = etree.HTML(html)
    # 商品名称
    name = dom.xpath('//div[@class="itemTitle"]/span[1]/text()')
    print(name)
    print(len(name))
    # 商品价格
    money = dom.xpath('//div[@class="itemTitle"]/span[2]/text()')
    print(money)
    print(len(name))
    # 商品概述
    info = dom.xpath('//div[@class="itemText"]/text()')
    print(info)
    print(len(name))
    data = pd.DataFrame({
        '商品名称':name,
        '商品价格':money,
        '商品概述':info
    })
    return data
#每页数据以追加形式保存至csv文件
def save_file(data): #参数为DataFrame
    columns = ['商品名称','商品价格','商品概述']
    filename = 'vivo手机与手机配件信息.csv'
    if os.path.exists(filename):
        data.to_csv(filename,mode='a',encoding='utf_8_sig',columns=columns,index=False,header=False)
    else:
        data.to_csv(filename,mode='a',encoding='utf_8_sig',columns=columns,index=False)
    print("保存成功!")

if __name__ == '__main__':
    driver = webdriver.Chrome()
    wait = WebDriverWait(driver, 10)
    url = "https://vivo.com.cn/"
    #获取手机页面
    html = login(url)
    # 获取手机信息
    for i in range(1, 8):
        if i != 1:  #不是第一页
            html = get_next_page()
        data = parse_page(html)
        print(data)
        save_file(data)
        print("手机的第{}页爬取完成!".format(i))
    #selinum点击配件按钮跳转到手机配件
    search_btn = wait.until(EC.element_to_be_clickable((By.ID, 'tab_contentAccessories')))
    search_btn.click()
    #获取手机配件信息
    for i in range(1, 10):
        if i != 1:  # 不是第一页
            html = get_next_page()
        data = parse_page(html)
        print(data)
        save_file(data)
        print("配件的第{}页爬取完成!".format(i))
    driver.close()

THE END

发表回复