Python中如何通过Flask控制Selenium实现自动化任务？

本人正在做一个练手的爬虫项目，主要是用 flask 充当 selenium 爬虫的控制面板，初步渣代码如下：

from flask import Flask,render_template,request,redirect,url_for
from Control import control  ## selenium 的 class
app = Flask(name)
app.secret_key = ‘dqwer235r*tbqew4r1$1232~@’
test = control()    ##启动后会启动两个 selenium 实例。。。
@app.route(’/dashboard/’,methods=[‘GET’,‘POST’])
def dashboard():
if request.method == ‘GET’:
return render_template(‘dashboard.html’)
elif request.method == ‘POST’:
if request.form[‘submit’] == ‘Start’:
msg = ‘Started.’
qrSrc = test.qr()   ## 返回 selenium 中获取的二维码链接
return render_template(‘dashboard.html’,msg=msg,qrSrc=str(qrSrc))
else:
msg = ‘no’
return render_template(‘dashboard.html’,msg=msg)
if name == ‘main’:
app.run(host=‘0.0.0.0’,port=80,debug=True)

from selenium import webdriver
import time

class control:
    def __init__(self):
        self.driver = webdriver.Chrome()

    def qr(self):
        self.driver.get('http://example.com')
        time.sleep(2)
        qr_src = self.driver.find_element_by_id('js_login_qrcode_img').get_attribute('src')
        return qr_src   ##扫描二维码登陆

if __name__ == '__main__':
    test = control()
    print(test.qr())

想请教下如何实现 selenium 的持久化，即浏览器一直不关闭并且能接收 flask 传入的命令？感觉要把 flask 嵌入另一个 selenium 的 class 中，但我比较想把 flask 和爬虫两者分离。另外上面的代码会弄出两个浏览器实例，暂时未知道原因。。谢谢大家 :)

ionicwang 1楼

这个时候就要用到 flask 的上下文机制了

h691938207 2楼

from flask import Flask, request, jsonify
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import threading
import time

app = Flask(__name__)

# 全局变量存储浏览器实例
driver = None
driver_lock = threading.Lock()

def init_browser():
    """初始化浏览器实例"""
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # 无头模式
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    return webdriver.Chrome(options=options)

@app.route('/start', methods=['POST'])
def start_browser():
    """启动浏览器"""
    global driver
    with driver_lock:
        if driver is None:
            driver = init_browser()
            return jsonify({'status': 'success', 'message': '浏览器已启动'})
        return jsonify({'status': 'error', 'message': '浏览器已在运行中'})

@app.route('/navigate', methods=['POST'])
def navigate():
    """导航到指定URL"""
    global driver
    url = request.json.get('url')
    
    if not url:
        return jsonify({'status': 'error', 'message': 'URL不能为空'})
    
    with driver_lock:
        if driver is None:
            return jsonify({'status': 'error', 'message': '请先启动浏览器'})
        
        try:
            driver.get(url)
            return jsonify({'status': 'success', 'message': f'已导航到 {url}'})
        except Exception as e:
            return jsonify({'status': 'error', 'message': str(e)})

@app.route('/click', methods=['POST'])
def click_element():
    """点击页面元素"""
    global driver
    data = request.json
    by = data.get('by', 'xpath')  # 支持xpath, css_selector, id等
    value = data.get('value')
    
    if not value:
        return jsonify({'status': 'error', 'message': '元素选择器不能为空'})
    
    with driver_lock:
        if driver is None:
            return jsonify({'status': 'error', 'message': '请先启动浏览器'})
        
        try:
            if by == 'xpath':
                element = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.XPATH, value))
                )
            elif by == 'css':
                element = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.CSS_SELECTOR, value))
                )
            elif by == 'id':
                element = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.ID, value))
                )
            else:
                return jsonify({'status': 'error', 'message': '不支持的定位方式'})
            
            element.click()
            return jsonify({'status': 'success', 'message': '点击成功'})
        except Exception as e:
            return jsonify({'status': 'error', 'message': str(e)})

@app.route('/input', methods=['POST'])
def input_text():
    """在输入框中输入文本"""
    global driver
    data = request.json
    by = data.get('by', 'xpath')
    value = data.get('value')
    text = data.get('text')
    
    if not all([value, text]):
        return jsonify({'status': 'error', 'message': '参数不完整'})
    
    with driver_lock:
        if driver is None:
            return jsonify({'status': 'error', 'message': '请先启动浏览器'})
        
        try:
            if by == 'xpath':
                element = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, value))
                )
            elif by == 'css':
                element = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, value))
                )
            else:
                return jsonify({'status': 'error', 'message': '不支持的定位方式'})
            
            element.clear()
            element.send_keys(text)
            return jsonify({'status': 'success', 'message': '输入成功'})
        except Exception as e:
            return jsonify({'status': 'error', 'message': str(e)})

@app.route('/screenshot', methods=['GET'])
def take_screenshot():
    """截取屏幕截图"""
    global driver
    with driver_lock:
        if driver is None:
            return jsonify({'status': 'error', 'message': '请先启动浏览器'})
        
        try:
            filename = f"screenshot_{int(time.time())}.png"
            driver.save_screenshot(filename)
            return jsonify({'status': 'success', 'filename': filename})
        except Exception as e:
            return jsonify({'status': 'error', 'message': str(e)})

@app.route('/stop', methods=['POST'])
def stop_browser():
    """停止浏览器"""
    global driver
    with driver_lock:
        if driver is not None:
            driver.quit()
            driver = None
            return jsonify({'status': 'success', 'message': '浏览器已停止'})
        return jsonify({'status': 'error', 'message': '浏览器未运行'})

@app.route('/execute_script', methods=['POST'])
def execute_script():
    """执行JavaScript代码"""
    global driver
    script = request.json.get('script')
    
    if not script:
        return jsonify({'status': 'error', 'message': '脚本不能为空'})
    
    with driver_lock:
        if driver is None:
            return jsonify({'status': 'error', 'message': '请先启动浏览器'})
        
        try:
            result = driver.execute_script(script)
            return jsonify({'status': 'success', 'result': result})
        except Exception as e:
            return jsonify({'status': 'error', 'message': str(e)})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)

这个方案的核心是通过Flask创建REST API来控制Selenium浏览器实例。主要特点：

线程安全：使用锁确保多请求时浏览器操作的安全
完整的CRUD操作：启动、导航、点击、输入、截图、执行脚本、停止
错误处理：每个端点都有完善的异常捕获
等待机制：使用WebDriverWait确保元素加载完成

使用示例：

# 启动浏览器
curl -X POST http://localhost:5000/start -H "Content-Type: application/json"

# 导航到百度
curl -X POST http://localhost:5000/navigate -H "Content-Type: application/json" -d '{"url": "https://www.baidu.com"}'

# 在搜索框输入文本
curl -X POST http://localhost:5000/input -H "Content-Type: application/json" -d '{"by": "xpath", "value": "//input[@id=\\"kw\\"]", "text": "Flask Selenium"}'

记得先安装依赖：pip install flask selenium

总结：用Flask API包装Selenium操作，实现远程控制浏览器自动化。

gougou168 3楼

selenium 变成 http server，flask 发参数过去就可以了。

gougou168 4楼

多谢，selenium-rc 看起来不错，研究一下

多谢指点，不过这个有点复杂，可能我要理解一段时间，而且网上也搜不到上下文的应用例子。。

itying888 5楼

两个实例的问题，你可以试一下 debug=False 或者 debug=True, use_reloader=False

htzhanglong 6楼

请问 selenium server 长期运行对硬件资源有限制吗？没看到这方面的资料

caililin 7楼

谢谢，的确是 debug=False 后变回一个实例了。