a# -*- coding: utf-8 -*-
"""
Created on Tue Oct 22 10:41:23 2024
@author: 1
"""
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium_stealth import stealth
import time
import random
import requests
from selenium.webdriver.chrome.options import Options
import re
import json
# 设置 ChromeDriver 的路径
chrome_driver_path = "D:\\chromedriver\\chromedriver.exe"
# 创建 ChromeOptions 对象
chrome_options = Options()
# 指定 Chrome 浏览器的路径
chrome_options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
# 修改 User-Agent
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36")
# 禁用自动化扩展
chrome_options.add_argument("--disable-extensions")
# 禁用 GPU 加速
chrome_options.add_argument("--disable-gpu")
# 禁用日志
chrome_options.add_argument("--log-level=3")
# 禁用沙箱
chrome_options.add_argument("--no-sandbox")
# 禁用插件加载
chrome_options.add_argument("--disable-plugins")
# 禁用密码保存提示
chrome_options.add_argument("--password-store=basic")
# 禁用弹出窗口
chrome_options.add_argument("--disable-popup-blocking")
# 禁用通知
chrome_options.add_argument("--disable-notifications")
# 禁用音频
chrome_options.add_argument("--mute-audio")
# 使用代理(可选)
# chrome_options.add_argument("--proxy-server=your_proxy_server:port")
# 创建 ChromeDriver 服务
service = Service(chrome_driver_path)
# 创建 WebDriver 对象
# 配置 Selenium 使用代理
driver = webdriver.Chrome(service=service, options=chrome_options)
# 使用 selenium-stealth 避免检测
stealth(driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="Win32",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
try:
# 访问网页
driver.get("https:///")
# 检查是否发生了重定向
final_url = driver.current_url
print(final_url)
if final_url != "https://":
print(f"发生了重定向,最终 URL: {final_url}")
# 如果需要,可以在这里处理重定向后的 URL
driver.get(final_url)
# 禁用 navigator.webdriver 检测
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
# 随机化页面加载时间
random_time = random.uniform(1, 4)
time.sleep(random_time)
# 模拟鼠标和键盘事件
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.NAME, "username"))
)
actions = webdriver.ActionChains(driver)
actions.move_to_element(element)
actions.pause(random.uniform(0.5, 1.5))
actions.click()
actions.pause(random.uniform(0.5, 1.5))
actions.send_keys("username")
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.NAME, "password"))
)
actions.move_to_element(element)
actions.pause(random.uniform(0.5, 1.5))
actions.click()
actions.pause(random.uniform(0.5, 1.5))
actions.send_keys("password")
# 获取登录按钮
element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//button[@class="el-button handle-btn el-button--default"]/span[text()="登录"]'))
)
actions.move_to_element(element)
actions.pause(random.uniform(0.5, 1.5))
actions.click()
#
actions.perform()
time.sleep(2)
# 获取页面标题
page_title = driver.title
print(f"页面标题: {page_title}")
# 打印页面内容(可选)
driver.get("https://")
#print(driver.page_source)
# 等待几秒钟以便观察页面
time.sleep(5)
for _ in range(1): # 假设需要翻5页
# 执行页面操作,例如抓取数据
# ...
# 找到“下一页”按钮并点击
# 使用 XPath 表达式获取按钮元素
button = driver.find_element(By.XPATH, "//button[@class='btn-next' and span[text()='下一页']]")
# 点击按钮
button.click()
# 等待一段时间,以便观察效果
time.sleep(3)
print("aaaaaaaaaaaaaaaaaaaaaaa")
#print(driver.page_source)
# 获取页面的 Cookies
cookies = driver.get_cookies()
# 将 Selenium 获取的 Cookies 转换为 requests 可以使用的格式
cookie_dict = {cookie['name']: cookie['value'] for cookie in cookies}
# 获取页面的请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
'Accept-Language': 'en-US,en;q=0.9',
'Content-Type': 'application/x-www-form-urlencoded', # 根据实际情况调整
'Referer': 'https://www.qianlima.com/' # 根据实际情况调整
}
ahtml = driver.page_source
# 使用正则表达式提取 data-cid 的值
pattern = r'data-cid="(\d+)"'
matches = re.findall(pattern, ahtml)
# 输出提取到的 data-cid 值
for cid in matches:
print(cid)
aurl = 'https://www.qianlima.com/bid-' + cid + '.html'
print(aurl)
ajsonurl = 'https://detail.vip.qianlima.com/rest/detail/alltypesdetail/detail/' + cid
print(ajsonurl)
post_data = {
'psw': '',
'username': '0'
}
# 发送 POST 请求
response = requests.post(ajsonurl, cookies=cookie_dict, headers=headers)
# 检查响应状态码
if response.status_code == 200:
print('POST 请求成功')
print('响应内容:', response.text)
# 解析 JSON 字符串
data = json.loads(response.text)
print('开始解析结果')
# 遍历 JSON 对象并输出每个字段的值
def print_json_values(data, prefix=""):
if isinstance(data, dict):
for key, value in data.items():
new_prefix = f"{prefix}.{key}" if prefix else key
print_json_values(value, new_prefix)
elif isinstance(data, list):
for index, value in enumerate(data):
new_prefix = f"{prefix}[{index}]"
print_json_values(value, new_prefix)
else:
print(f"{prefix}: {data}")
print_json_values(data)
#将结果存入数据库
print('结束解析结果')
else:
print('POST 请求失败,状态码:', response.status_code)
print('响应内容:', response.text)
except Exception as e:
print(f"发生错误: {e}")
#finally:
# 关闭浏览器
# driver.quit
# server.stop
