Python爬虫如何越过滑动验证码?

如上图,这样的滑动窗要怎么越过?


Python爬虫如何越过滑动验证码?
3 回复

我记得 v 站原来有个帅哥没事干就破这个的,不管厂商怎么更新,他就秒破。
你搜一下,不过应该需要点技术的,不是自己要写就能写出来的


要过滑动验证码,得先搞清楚它怎么工作的。一般分三步:获取验证码图片、计算滑块需要移动的距离、模拟滑动轨迹。

这里给个用Selenium和OpenCV的完整方案:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
import cv2
import numpy as np
import time
import requests
from io import BytesIO

def get_slider_distance(bg_url, slider_url):
    """计算滑块需要移动的距离"""
    # 下载背景图和滑块图
    bg_resp = requests.get(bg_url)
    slider_resp = requests.get(slider_url)
    
    bg_img = cv2.imdecode(np.frombuffer(bg_resp.content, np.uint8), cv2.IMREAD_COLOR)
    slider_img = cv2.imdecode(np.frombuffer(slider_resp.content, np.uint8), cv2.IMREAD_COLOR)
    
    # 用模板匹配找出滑块位置
    result = cv2.matchTemplate(bg_img, slider_img, cv2.TM_CCOEFF_NORMED)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
    
    # 返回匹配位置的x坐标
    return max_loc[0]

def generate_track(distance):
    """生成模拟人类滑动的轨迹"""
    track = []
    current = 0
    mid = distance * 3/4
    t = 0.2
    v = 0
    
    while current < distance:
        if current < mid:
            a = 2
        else:
            a = -3
            
        v0 = v
        v = v0 + a * t
        move = v0 * t + 0.5 * a * t * t
        current += move
        track.append(round(move))
    
    # 微调确保准确到达
    overshoot = current - distance
    track.append(round(-overshoot))
    
    return track

def bypass_slider_captcha():
    """主函数:绕过滑动验证码"""
    driver = webdriver.Chrome()
    driver.get("目标网站URL")
    
    try:
        # 等待验证码出现
        wait = WebDriverWait(driver, 10)
        bg_element = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "验证码背景图class")))
        slider_element = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "滑块class")))
        
        # 获取图片URL
        bg_url = bg_element.get_attribute("src")
        slider_url = slider_element.get_attribute("src")
        
        # 计算滑动距离
        distance = get_slider_distance(bg_url, slider_url)
        
        # 生成滑动轨迹
        track = generate_track(distance)
        
        # 执行滑动
        actions = ActionChains(driver)
        actions.click_and_hold(slider_element).perform()
        
        for move in track:
            actions.move_by_offset(move, 0).perform()
            time.sleep(0.01)
        
        actions.release().perform()
        time.sleep(2)
        
        print("验证码已通过")
        
    except Exception as e:
        print(f"出错: {e}")
    finally:
        driver.quit()

if __name__ == "__main__":
    bypass_slider_captcha()

关键点:

  1. 用OpenCV的模板匹配找缺口位置,比像素对比更准
  2. 轨迹模拟要带加速度变化,不能匀速
  3. 最后要有个回拉微调,避免刚好差几个像素

有些网站会检测轨迹,这时候得用更复杂的轨迹算法,或者考虑打码平台。

总结:核心就是图片识别+轨迹模拟。

你 v 站,站内搜极验,就有

回到顶部