Python基于Selenium实现自动打开百度,京东(Python automatically opens Baidu and jd.com based on Selenium)-python
Python基于Selenium实现自动打开百度,京东(Python automatically opens Baidu and jd.com based on Selenium)
使用selenium去调用浏览器,需要一个驱动,浏览器的webdriver需要独立安装,如果是chrome在浏览器输入框输入chrome://version/ 查看相应版本,
http://npm.taobao.org/mirrors/chromedriver/下载相应驱动即可
百度
# _*_ coding:utf-8 _*_
from selenium import webdriver
# 创建浏览器驱动对象
# 这行代码会打开一个空白的浏览器
driver = webdriver.Chrome()
# 访问到项目所在的网址
driver.get("https://www.baidu.com")
# 找到页面上的搜索输入框 id="kw"
# 找到元素以后,可以直接赋值给变量,再通过变量操作元素
ele = driver.find_element_by_id("kw")
ele.send_keys("大风吹") # 对文本输入框输入内容
# 也可以直接操作元素, 百度一下 按钮的 id="su"
driver.find_element_by_id("su").click()
# # 但不能操作元素后再赋值给变量
# ele = driver.find_element_by_id("su").click()
# ele.click() # 相当于 None.click()
# driver.quit()
京东:
# _*_ coding:gbk _*_
# 爬取 https://www.jd.com/ 京东图书
import csv
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
# 创建浏览器驱动对象
# 这行代码会打开一个空白的浏览器
driver = webdriver.Chrome()
# 京东所在网站
# 访问到项目所在的网址
driver.get("https://www.jd.com")
# 输入需要查找的关键字
p_input = driver.find_element(By.ID, 'key')
p_input.send_keys('python编程') # 找到输入框输入
time.sleep(1)
# 点击搜素按钮
button=driver.find_element(By.CLASS_NAME,"button").click()
time.sleep(1)
all_book_info = []
num=200
head=['书名', '价格']
#csv文件的路径和名字
path='./book.csv'
def write_csv(head,all_book_info,path):
with open(path, 'w', newline='',encoding='gbk') as file: # utf-8 乱码建议用 gbk
fileWriter = csv.writer(file)
fileWriter.writerow(head)
fileWriter.writerows(all_book_info)
# 爬取一页
def get_onePage_info(num):
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
time.sleep(2)
# 书籍列表
J_goodsList = driver.find_element(By.ID, "J_goodsList")
listbook = J_goodsList.find_elements(By.TAG_NAME, "li")
for res in listbook:
num = num-1
book_info = []
name =res.find_element(By.CLASS_NAME, "p-name").find_element(By.TAG_NAME, "em").text
price = res.find_element(By.CLASS_NAME, "p-price").find_element(By.TAG_NAME, "i").text
book_info.append(name)
book_info.append(price)
# bookdetail = res.find_element(By.CLASS_NAME, "p-bookdetails")
# author = bookdetail.find_element(By.CLASS_NAME, "p-bi-name").find_element(By.TAG_NAME, "a").text
# store = bookdetail.find_element(By.CLASS_NAME, "p-bi-store").find_element(By.TAG_NAME, "a").text
# book_info.append(author)
# book_info.append(store)
all_book_info.append(book_info)
if num==0:
break
return num
while num!=0:
num = get_onePage_info(num)
driver.find_element(By.CLASS_NAME, 'pn-next').click() # 点击下一页
time.sleep(2)
write_csv(head, all_book_info, path)
# driver.close()
————————
Using selenium to call the browser requires a driver. The webdriver of the browser needs to be installed independently. If it is chrome, enter it in the browser input box chrome://version/ View the corresponding version,
http://npm.taobao.org/mirrors/chromedriver/下载相应驱动即可
Baidu
# _*_ coding:utf-8 _*_
from selenium import webdriver
# 创建浏览器驱动对象
# 这行代码会打开一个空白的浏览器
driver = webdriver.Chrome()
# 访问到项目所在的网址
driver.get("https://www.baidu.com")
# 找到页面上的搜索输入框 id="kw"
# 找到元素以后,可以直接赋值给变量,再通过变量操作元素
ele = driver.find_element_by_id("kw")
ele.send_keys("大风吹") # 对文本输入框输入内容
# 也可以直接操作元素, 百度一下 按钮的 id="su"
driver.find_element_by_id("su").click()
# # 但不能操作元素后再赋值给变量
# ele = driver.find_element_by_id("su").click()
# ele.click() # 相当于 None.click()
# driver.quit()
JD.COM:
# _*_ coding:gbk _*_
# 爬取 https://www.jd.com/ 京东图书
import csv
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
# 创建浏览器驱动对象
# 这行代码会打开一个空白的浏览器
driver = webdriver.Chrome()
# 京东所在网站
# 访问到项目所在的网址
driver.get("https://www.jd.com")
# 输入需要查找的关键字
p_input = driver.find_element(By.ID, 'key')
p_input.send_keys('python编程') # 找到输入框输入
time.sleep(1)
# 点击搜素按钮
button=driver.find_element(By.CLASS_NAME,"button").click()
time.sleep(1)
all_book_info = []
num=200
head=['书名', '价格']
#csv文件的路径和名字
path='./book.csv'
def write_csv(head,all_book_info,path):
with open(path, 'w', newline='',encoding='gbk') as file: # utf-8 乱码建议用 gbk
fileWriter = csv.writer(file)
fileWriter.writerow(head)
fileWriter.writerows(all_book_info)
# 爬取一页
def get_onePage_info(num):
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
time.sleep(2)
# 书籍列表
J_goodsList = driver.find_element(By.ID, "J_goodsList")
listbook = J_goodsList.find_elements(By.TAG_NAME, "li")
for res in listbook:
num = num-1
book_info = []
name =res.find_element(By.CLASS_NAME, "p-name").find_element(By.TAG_NAME, "em").text
price = res.find_element(By.CLASS_NAME, "p-price").find_element(By.TAG_NAME, "i").text
book_info.append(name)
book_info.append(price)
# bookdetail = res.find_element(By.CLASS_NAME, "p-bookdetails")
# author = bookdetail.find_element(By.CLASS_NAME, "p-bi-name").find_element(By.TAG_NAME, "a").text
# store = bookdetail.find_element(By.CLASS_NAME, "p-bi-store").find_element(By.TAG_NAME, "a").text
# book_info.append(author)
# book_info.append(store)
all_book_info.append(book_info)
if num==0:
break
return num
while num!=0:
num = get_onePage_info(num)
driver.find_element(By.CLASS_NAME, 'pn-next').click() # 点击下一页
time.sleep(2)
write_csv(head, all_book_info, path)
# driver.close()