Python代理框架mitmproxy、wyproxy使用难题,如何调用mitmproxy实现代理并输出所有请求URL及返回数据至文件

想写个小脚本,调用 mimtproxy 开启代理,自动记录所有请求 URL 及返回数据至文件中。搜索到 wyproxy 实现了类似的功能,但功能繁多,想修改为自用小程序,分析学习过程中,对于 wyproxy 的程序逻辑无法理解。特来求问,感谢。

执行 wyproxy,程序先运行 run()函数,接下来到 wyDaemon.run(),再 start_server(),再 WYProxy.run()
而入库是 WYProxy 类中的 response 函数,并没有看到调用,可数据被存储进库里。
求问 wyproxy 的入库流程是?可有更简要的脚本调用 mimtproxy 记录所有请求 URL 及返回数据至文件中?无限感谢。
https://github.com/ring04h/wyproxy/blob/master/wyproxy.py

#!/usr/bin/env python
# encoding: utf-8

import sys
import argparse
import logging

from utils.daemon import Daemon
from mitmproxy import flow, proxy, controller, options
from mitmproxy.proxy.server import ProxyServer
from utils.parser import ResponseParser, save_cnf, read_cnf
from utils.handle import wyproxy_request_handle, wyproxy_response_handle
from utils.mysql import MysqlInterface

logging.basicConfig(
level=logging.INFO, # filename=’/tmp/wyproxy.log’,
format=’%(asctime)s [%(levelname)s] %(message)s’,
)

class WYProxy(flow.FlowMaster):

def init(self, opts, server, state, unsave_data):
super(WYProxy, self).init(opts, server, state)
self.unsave_data = unsave_data

def run(self):
try:
logging.info(“wyproxy started successfully…”)
flow.FlowMaster.run(self)
except KeyboardInterrupt:
self.shutdown()
logging.info(“Ctrl C - stopping wyproxy server”)

@controller.handler
def request(self, f):
wyproxy_request_handle(f)

@controller.handler
def response(self, f):
wyproxy_response_handle(f)
if not self.unsave_data:
try:
parser = ResponseParser(f)
mysqldb_io = MysqlInterface()

mysqldb_io.insert_result(parser.parser_data())
except Exception as e:
logging.error(str(e))


# memory overfull bug
# print(len(self.state.flows))
# print(self.state.flow_count())
# self.state.clear()

def start_server(proxy_port, proxy_mode, unsave_data):
port = int(proxy_port) if proxy_port else 8080
mode = proxy_mode if proxy_mode else ‘regular’

if proxy_mode == ‘http’:
mode = ‘regular’

opts = options.Options(
listen_port=port,
mode=mode,
cadir="./ssl/",
)

config = proxy.ProxyConfig(opts)

state = flow.State()
server = ProxyServer(config)
m = WYProxy(opts, server, state, unsave_data)
m.run()

class wyDaemon(Daemon):

def init(self, pidfile, proxy_port=8080, proxy_mode=‘regular’, unsave_data=False):
super(wyDaemon, self).init(pidfile)
self.proxy_port = proxy_port
self.proxy_mode = proxy_mode
self.unsave_data = unsave_data

def run(self):
logging.info(“wyproxy is starting…”)
logging.info(“Listening: 0.0.0.0:{} {}”.format(
self.proxy_port, self.proxy_mode))
start_server(self.proxy_port, self.proxy_mode, self.unsave_data)

def run(args):

if args.restart:
args.port = read_cnf().get(‘port’)
args.mode = read_cnf().get(‘mode’)
args.unsave = read_cnf().get(‘unsave’)

if not args.pidfile:
args.pidfile = ‘/tmp/wyproxy.pid’


wyproxy = wyDaemon(
pidfile = args.pidfile,
proxy_port = args.port,
proxy_mode = args.mode,
unsave_data = args.unsave)

if args.daemon:
save_cnf(args)
wyproxy.start()
elif args.stop:
wyproxy.stop()
elif args.restart:
wyproxy.restart()
else:
wyproxy.run()

if name == ‘main’:

parser = argparse.ArgumentParser(description=“wyproxy v 1.0 ( Proxying And Recording HTTP/HTTPs and Socks5)”)
parser.add_argument("-d","–daemon",action=“store_true”,
help=“start wyproxy with daemond”)
parser.add_argument("-stop","–stop",action=“store_true”,required=False,
help=“stop wyproxy daemond”)
parser.add_argument("-restart","–restart",action=“store_true”,required=False,
help=“restart wyproxy daemond”)
parser.add_argument("-pid","–pidfile",metavar="",
help=“wyproxy daemond pidfile name”)
parser.add_argument("-p","–port",metavar="",default=“8080”,
help=“wyproxy bind port”)
parser.add_argument("-m","–mode",metavar="",choices=[‘http’,‘socks5’,‘transparent’],default=“http”,
help=“wyproxy mode (HTTP/HTTPS, Socks5, Transparent)”)
parser.add_argument("-us","–unsave",action=“store_true”,required=False,
help=“Do not save records to MySQL server”)
args = parser.parse_args()

try:
run(args)
except KeyboardInterrupt:
logging.info(“Ctrl C - Stopping Client”)
sys.exit(1)
Python代理框架mitmproxy、wyproxy使用难题,如何调用mitmproxy实现代理并输出所有请求URL及返回数据至文件


1 回复

我来给你一个完整的解决方案。mitmproxy确实是个强大的工具,但直接编程调用需要点技巧。

#!/usr/bin/env python3
"""
mitmproxy编程调用示例 - 捕获所有请求URL和响应数据到文件
"""

import asyncio
import json
from mitmproxy import options
from mitmproxy.tools.dump import DumpMaster
from mitmproxy import http
import logging
from datetime import datetime

class RequestLogger:
    """自定义mitmproxy插件,用于记录请求和响应"""
    
    def __init__(self, output_file="proxy_log.json"):
        self.output_file = output_file
        self.log_data = []
        logging.basicConfig(level=logging.INFO)
        
    def request(self, flow: http.HTTPFlow):
        """处理请求事件"""
        try:
            request_info = {
                "timestamp": datetime.now().isoformat(),
                "type": "request",
                "method": flow.request.method,
                "url": flow.request.pretty_url,
                "headers": dict(flow.request.headers),
                "content": flow.request.get_text() if flow.request.content else None,
                "client_ip": flow.client_conn.address[0] if flow.client_conn else None
            }
            
            # 记录到内存
            self.log_data.append(request_info)
            
            # 实时输出到控制台
            print(f"[REQUEST] {flow.request.method} {flow.request.pretty_url}")
            
        except Exception as e:
            logging.error(f"记录请求时出错: {e}")
    
    def response(self, flow: http.HTTPFlow):
        """处理响应事件"""
        try:
            response_info = {
                "timestamp": datetime.now().isoformat(),
                "type": "response",
                "url": flow.request.pretty_url,
                "status_code": flow.response.status_code,
                "headers": dict(flow.response.headers),
                "content": flow.response.get_text() if flow.response.content else None,
                "content_length": len(flow.response.content) if flow.response.content else 0
            }
            
            # 记录到内存
            self.log_data.append(response_info)
            
            # 实时输出到控制台
            print(f"[RESPONSE] {flow.response.status_code} {flow.request.pretty_url}")
            
            # 每10条记录保存一次到文件
            if len(self.log_data) % 10 == 0:
                self.save_to_file()
                
        except Exception as e:
            logging.error(f"记录响应时出错: {e}")
    
    def save_to_file(self):
        """保存日志到文件"""
        try:
            with open(self.output_file, 'w', encoding='utf-8') as f:
                json.dump(self.log_data, f, ensure_ascii=False, indent=2)
            logging.info(f"已保存 {len(self.log_data)} 条记录到 {self.output_file}")
        except Exception as e:
            logging.error(f"保存文件时出错: {e}")

async def start_proxy(port=8080):
    """启动代理服务器"""
    opts = options.Options(
        listen_port=port,
        ssl_insecure=True  # 允许自签名证书(仅用于调试)
    )
    
    # 创建DumpMaster实例
    m = DumpMaster(opts)
    
    # 添加我们的插件
    logger = RequestLogger("proxy_logs.json")
    m.addons.add(logger)
    
    # 配置代理模式
    m.options.set(
        mode="regular",  # 常规代理模式
        showhost=True
    )
    
    print(f"代理服务器启动在 http://127.0.0.1:{port}")
    print("按 Ctrl+C 停止代理并保存日志")
    
    try:
        await m.run()
    except KeyboardInterrupt:
        print("\n正在停止代理服务器...")
        logger.save_to_file()
        print(f"最终日志已保存到 {logger.output_file}")
    finally:
        m.shutdown()

def main():
    """主函数"""
    print("=" * 60)
    print("mitmproxy 代理日志工具")
    print("=" * 60)
    
    # 设置代理端口
    port = 8080
    
    # 启动代理
    asyncio.run(start_proxy(port))

if __name__ == "__main__":
    main()

使用说明:

  1. 安装依赖
pip install mitmproxy
  1. 运行代理
python proxy_logger.py
  1. 配置系统代理

    • 设置系统HTTP/HTTPS代理为:127.0.0.1:8080
    • 或者浏览器配置代理到该地址
  2. 安装mitmproxy证书(用于HTTPS解密):

    • 运行一次 mitmproxy 命令生成证书
    • 证书位置:~/.mitmproxy/mitmproxy-ca-cert.pem
    • 导入到系统或浏览器的受信任根证书

代码特点:

  • 实时输出请求URL到控制台
  • 保存完整请求和响应数据到JSON文件
  • 支持HTTPS流量解密(需要安装证书)
  • 异步处理,性能较好
  • 自动保存日志,防止数据丢失

输出文件格式

[
  {
    "timestamp": "2024-01-15T10:30:00.123456",
    "type": "request",
    "method": "GET",
    "url": "https://api.example.com/data",
    "headers": {...},
    "content": null
  },
  {
    "timestamp": "2024-01-15T10:30:00.234567",
    "type": "response",
    "url": "https://api.example.com/data",
    "status_code": 200,
    "headers": {...},
    "content": "{\"data\": \"example\"}",
    "content_length": 20
  }
]

简单建议:记得安装证书才能解密HTTPS流量。

回到顶部