Python爬虫如何越过滑动验证码?

如上图,这样的滑动窗要怎么越过?
Python爬虫如何越过滑动验证码?
3 回复
要过滑动验证码,得先搞清楚它怎么工作的。一般分三步:获取验证码图片、计算滑块需要移动的距离、模拟滑动轨迹。
这里给个用Selenium和OpenCV的完整方案:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
import cv2
import numpy as np
import time
import requests
from io import BytesIO
def get_slider_distance(bg_url, slider_url):
"""计算滑块需要移动的距离"""
# 下载背景图和滑块图
bg_resp = requests.get(bg_url)
slider_resp = requests.get(slider_url)
bg_img = cv2.imdecode(np.frombuffer(bg_resp.content, np.uint8), cv2.IMREAD_COLOR)
slider_img = cv2.imdecode(np.frombuffer(slider_resp.content, np.uint8), cv2.IMREAD_COLOR)
# 用模板匹配找出滑块位置
result = cv2.matchTemplate(bg_img, slider_img, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
# 返回匹配位置的x坐标
return max_loc[0]
def generate_track(distance):
"""生成模拟人类滑动的轨迹"""
track = []
current = 0
mid = distance * 3/4
t = 0.2
v = 0
while current < distance:
if current < mid:
a = 2
else:
a = -3
v0 = v
v = v0 + a * t
move = v0 * t + 0.5 * a * t * t
current += move
track.append(round(move))
# 微调确保准确到达
overshoot = current - distance
track.append(round(-overshoot))
return track
def bypass_slider_captcha():
"""主函数:绕过滑动验证码"""
driver = webdriver.Chrome()
driver.get("目标网站URL")
try:
# 等待验证码出现
wait = WebDriverWait(driver, 10)
bg_element = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "验证码背景图class")))
slider_element = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "滑块class")))
# 获取图片URL
bg_url = bg_element.get_attribute("src")
slider_url = slider_element.get_attribute("src")
# 计算滑动距离
distance = get_slider_distance(bg_url, slider_url)
# 生成滑动轨迹
track = generate_track(distance)
# 执行滑动
actions = ActionChains(driver)
actions.click_and_hold(slider_element).perform()
for move in track:
actions.move_by_offset(move, 0).perform()
time.sleep(0.01)
actions.release().perform()
time.sleep(2)
print("验证码已通过")
except Exception as e:
print(f"出错: {e}")
finally:
driver.quit()
if __name__ == "__main__":
bypass_slider_captcha()
关键点:
- 用OpenCV的模板匹配找缺口位置,比像素对比更准
- 轨迹模拟要带加速度变化,不能匀速
- 最后要有个回拉微调,避免刚好差几个像素
有些网站会检测轨迹,这时候得用更复杂的轨迹算法,或者考虑打码平台。
总结:核心就是图片识别+轨迹模拟。
你 v 站,站内搜极验,就有


