Python中如何实现scrapy随机请求时间

"RANDOM_DELAY": 10, 这个是 0 到 10 ??我想 5-10 如何设置?


Python中如何实现scrapy随机请求时间
2 回复
import random
import time
from scrapy import Spider, Request
from scrapy.http import Response

class RandomDelaySpider(Spider):
    name = 'random_delay_spider'
    
    def start_requests(self):
        # 配置随机延迟范围(单位:秒)
        min_delay = 1  # 最小延迟
        max_delay = 5  # 最大延迟
        
        urls = ['http://example.com/page1', 'http://example.com/page2']
        
        for url in urls:
            # 为每个请求生成随机延迟
            delay = random.uniform(min_delay, max_delay)
            yield Request(
                url=url,
                callback=self.parse,
                meta={'download_delay': delay}  # 通过meta传递延迟参数
            )
            # 实际等待(可选,取决于是否需要立即执行延迟)
            time.sleep(delay)
    
    def parse(self, response: Response):
        # 处理响应
        yield {'url': response.url}

# 或者在settings.py中配置全局随机延迟
"""
import random

class RandomDelayMiddleware:
    def __init__(self, delay):
        self.delay = delay
    
    @classmethod
    def from_crawler(cls, crawler):
        delay = crawler.settings.get('RANDOM_DELAY', 0)
        return cls(delay)
    
    def process_request(self, request, spider):
        delay = random.uniform(0, self.delay)
        time.sleep(delay)
        return None

# settings.py配置
DOWNLOADER_MIDDLEWARES = {
    'your_project.middlewares.RandomDelayMiddleware': 543,
}
RANDOM_DELAY = 3  # 最大随机延迟秒数
"""

random.uniform()控制请求间隔,避免被封。


https://doc.scrapy.org/en/latest/topics/settings.html?highlight=time%20out#randomize-download-delay

<br>RANDOMIZE_DOWNLOAD_DELAY<br>Default: True<br><br>If enabled, Scrapy will wait a random amount of time (between 0.5 * DOWNLOAD_DELAY and 1.5 * DOWNLOAD_DELAY) while fetching requests from the same website.<br>

回到顶部