Python代理框架mitmproxy、wyproxy使用难题,如何调用mitmproxy实现代理并输出所有请求URL及返回数据至文件
想写个小脚本,调用 mimtproxy 开启代理,自动记录所有请求 URL 及返回数据至文件中。搜索到 wyproxy 实现了类似的功能,但功能繁多,想修改为自用小程序,分析学习过程中,对于 wyproxy 的程序逻辑无法理解。特来求问,感谢。
执行 wyproxy,程序先运行 run()函数,接下来到 wyDaemon.run(),再 start_server(),再 WYProxy.run()
而入库是 WYProxy 类中的 response 函数,并没有看到调用,可数据被存储进库里。
求问 wyproxy 的入库流程是?可有更简要的脚本调用 mimtproxy 记录所有请求 URL 及返回数据至文件中?无限感谢。
https://github.com/ring04h/wyproxy/blob/master/wyproxy.py
#!/usr/bin/env python
# encoding: utf-8
import sys
import argparse
import logging
from utils.daemon import Daemon
from mitmproxy import flow, proxy, controller, options
from mitmproxy.proxy.server import ProxyServer
from utils.parser import ResponseParser, save_cnf, read_cnf
from utils.handle import wyproxy_request_handle, wyproxy_response_handle
from utils.mysql import MysqlInterface
logging.basicConfig(
level=logging.INFO, # filename=’/tmp/wyproxy.log’,
format=’%(asctime)s [%(levelname)s] %(message)s’,
)
class WYProxy(flow.FlowMaster):
def init(self, opts, server, state, unsave_data):
super(WYProxy, self).init(opts, server, state)
self.unsave_data = unsave_data
def run(self):
try:
logging.info(“wyproxy started successfully…”)
flow.FlowMaster.run(self)
except KeyboardInterrupt:
self.shutdown()
logging.info(“Ctrl C - stopping wyproxy server”)
@controller.handler
def request(self, f):
wyproxy_request_handle(f)
@controller.handler
def response(self, f):
wyproxy_response_handle(f)
if not self.unsave_data:
try:
parser = ResponseParser(f)
mysqldb_io = MysqlInterface()
mysqldb_io.insert_result(parser.parser_data())
except Exception as e:
logging.error(str(e))
# memory overfull bug
# print(len(self.state.flows))
# print(self.state.flow_count())
# self.state.clear()
def start_server(proxy_port, proxy_mode, unsave_data):
port = int(proxy_port) if proxy_port else 8080
mode = proxy_mode if proxy_mode else ‘regular’
if proxy_mode == ‘http’:
mode = ‘regular’
opts = options.Options(
listen_port=port,
mode=mode,
cadir="./ssl/",
)
config = proxy.ProxyConfig(opts)
state = flow.State()
server = ProxyServer(config)
m = WYProxy(opts, server, state, unsave_data)
m.run()
class wyDaemon(Daemon):
def init(self, pidfile, proxy_port=8080, proxy_mode=‘regular’, unsave_data=False):
super(wyDaemon, self).init(pidfile)
self.proxy_port = proxy_port
self.proxy_mode = proxy_mode
self.unsave_data = unsave_data
def run(self):
logging.info(“wyproxy is starting…”)
logging.info(“Listening: 0.0.0.0:{} {}”.format(
self.proxy_port, self.proxy_mode))
start_server(self.proxy_port, self.proxy_mode, self.unsave_data)
def run(args):
if args.restart:
args.port = read_cnf().get(‘port’)
args.mode = read_cnf().get(‘mode’)
args.unsave = read_cnf().get(‘unsave’)
if not args.pidfile:
args.pidfile = ‘/tmp/wyproxy.pid’
wyproxy = wyDaemon(
pidfile = args.pidfile,
proxy_port = args.port,
proxy_mode = args.mode,
unsave_data = args.unsave)
if args.daemon:
save_cnf(args)
wyproxy.start()
elif args.stop:
wyproxy.stop()
elif args.restart:
wyproxy.restart()
else:
wyproxy.run()
if name == ‘main’:
parser = argparse.ArgumentParser(description=“wyproxy v 1.0 ( Proxying And Recording HTTP/HTTPs and Socks5)”)
parser.add_argument("-d","–daemon",action=“store_true”,
help=“start wyproxy with daemond”)
parser.add_argument("-stop","–stop",action=“store_true”,required=False,
help=“stop wyproxy daemond”)
parser.add_argument("-restart","–restart",action=“store_true”,required=False,
help=“restart wyproxy daemond”)
parser.add_argument("-pid","–pidfile",metavar="",
help=“wyproxy daemond pidfile name”)
parser.add_argument("-p","–port",metavar="",default=“8080”,
help=“wyproxy bind port”)
parser.add_argument("-m","–mode",metavar="",choices=[‘http’,‘socks5’,‘transparent’],default=“http”,
help=“wyproxy mode (HTTP/HTTPS, Socks5, Transparent)”)
parser.add_argument("-us","–unsave",action=“store_true”,required=False,
help=“Do not save records to MySQL server”)
args = parser.parse_args()
try:
run(args)
except KeyboardInterrupt:
logging.info(“Ctrl C - Stopping Client”)
sys.exit(1)
Python代理框架mitmproxy、wyproxy使用难题,如何调用mitmproxy实现代理并输出所有请求URL及返回数据至文件
我来给你一个完整的解决方案。mitmproxy确实是个强大的工具,但直接编程调用需要点技巧。
#!/usr/bin/env python3
"""
mitmproxy编程调用示例 - 捕获所有请求URL和响应数据到文件
"""
import asyncio
import json
from mitmproxy import options
from mitmproxy.tools.dump import DumpMaster
from mitmproxy import http
import logging
from datetime import datetime
class RequestLogger:
"""自定义mitmproxy插件,用于记录请求和响应"""
def __init__(self, output_file="proxy_log.json"):
self.output_file = output_file
self.log_data = []
logging.basicConfig(level=logging.INFO)
def request(self, flow: http.HTTPFlow):
"""处理请求事件"""
try:
request_info = {
"timestamp": datetime.now().isoformat(),
"type": "request",
"method": flow.request.method,
"url": flow.request.pretty_url,
"headers": dict(flow.request.headers),
"content": flow.request.get_text() if flow.request.content else None,
"client_ip": flow.client_conn.address[0] if flow.client_conn else None
}
# 记录到内存
self.log_data.append(request_info)
# 实时输出到控制台
print(f"[REQUEST] {flow.request.method} {flow.request.pretty_url}")
except Exception as e:
logging.error(f"记录请求时出错: {e}")
def response(self, flow: http.HTTPFlow):
"""处理响应事件"""
try:
response_info = {
"timestamp": datetime.now().isoformat(),
"type": "response",
"url": flow.request.pretty_url,
"status_code": flow.response.status_code,
"headers": dict(flow.response.headers),
"content": flow.response.get_text() if flow.response.content else None,
"content_length": len(flow.response.content) if flow.response.content else 0
}
# 记录到内存
self.log_data.append(response_info)
# 实时输出到控制台
print(f"[RESPONSE] {flow.response.status_code} {flow.request.pretty_url}")
# 每10条记录保存一次到文件
if len(self.log_data) % 10 == 0:
self.save_to_file()
except Exception as e:
logging.error(f"记录响应时出错: {e}")
def save_to_file(self):
"""保存日志到文件"""
try:
with open(self.output_file, 'w', encoding='utf-8') as f:
json.dump(self.log_data, f, ensure_ascii=False, indent=2)
logging.info(f"已保存 {len(self.log_data)} 条记录到 {self.output_file}")
except Exception as e:
logging.error(f"保存文件时出错: {e}")
async def start_proxy(port=8080):
"""启动代理服务器"""
opts = options.Options(
listen_port=port,
ssl_insecure=True # 允许自签名证书(仅用于调试)
)
# 创建DumpMaster实例
m = DumpMaster(opts)
# 添加我们的插件
logger = RequestLogger("proxy_logs.json")
m.addons.add(logger)
# 配置代理模式
m.options.set(
mode="regular", # 常规代理模式
showhost=True
)
print(f"代理服务器启动在 http://127.0.0.1:{port}")
print("按 Ctrl+C 停止代理并保存日志")
try:
await m.run()
except KeyboardInterrupt:
print("\n正在停止代理服务器...")
logger.save_to_file()
print(f"最终日志已保存到 {logger.output_file}")
finally:
m.shutdown()
def main():
"""主函数"""
print("=" * 60)
print("mitmproxy 代理日志工具")
print("=" * 60)
# 设置代理端口
port = 8080
# 启动代理
asyncio.run(start_proxy(port))
if __name__ == "__main__":
main()
使用说明:
- 安装依赖:
pip install mitmproxy
- 运行代理:
python proxy_logger.py
-
配置系统代理:
- 设置系统HTTP/HTTPS代理为:
127.0.0.1:8080 - 或者浏览器配置代理到该地址
- 设置系统HTTP/HTTPS代理为:
-
安装mitmproxy证书(用于HTTPS解密):
- 运行一次
mitmproxy命令生成证书 - 证书位置:
~/.mitmproxy/mitmproxy-ca-cert.pem - 导入到系统或浏览器的受信任根证书
- 运行一次
代码特点:
- 实时输出请求URL到控制台
- 保存完整请求和响应数据到JSON文件
- 支持HTTPS流量解密(需要安装证书)
- 异步处理,性能较好
- 自动保存日志,防止数据丢失
输出文件格式:
[
{
"timestamp": "2024-01-15T10:30:00.123456",
"type": "request",
"method": "GET",
"url": "https://api.example.com/data",
"headers": {...},
"content": null
},
{
"timestamp": "2024-01-15T10:30:00.234567",
"type": "response",
"url": "https://api.example.com/data",
"status_code": 200,
"headers": {...},
"content": "{\"data\": \"example\"}",
"content_length": 20
}
]
简单建议:记得安装证书才能解密HTTPS流量。

