Python中如何抓包APP并提取图片内容，但无法获取文字信息？

抓了下包，都有的，https://api.news.taozuiredian.com 开头的

我遇到过类似问题，抓包拿到图片数据但文字信息缺失。通常是因为文字内容通过其他接口传输或使用了不同的数据格式。

核心思路是：先抓取完整的网络请求，然后分析数据流向。这里提供一个使用 mitmproxy 的完整方案：

# app_capture.py
import mitmproxy.http
from mitmproxy import ctx
import json
import re
from pathlib import Path

class AppCapture:
    def __init__(self):
        self.image_data = []
        self.text_data = []
        self.output_dir = Path("./captured_data")
        self.output_dir.mkdir(exist_ok=True)
    
    def response(self, flow: mitmproxy.http.HTTPFlow):
        # 检查是否为图片响应
        content_type = flow.response.headers.get("Content-Type", "").lower()
        
        if "image" in content_type:
            # 保存图片数据
            img_filename = f"image_{len(self.image_data)}.bin"
            img_path = self.output_dir / img_filename
            
            with open(img_path, "wb") as f:
                f.write(flow.response.content)
            
            self.image_data.append({
                "url": flow.request.url,
                "filename": img_filename,
                "headers": dict(flow.response.headers)
            })
            ctx.log.info(f"捕获图片: {flow.request.url}")
        
        # 检查可能的文字数据（JSON、文本等）
        elif any(x in content_type for x in ["json", "text", "javascript"]):
            try:
                # 尝试解析为JSON
                if "json" in content_type:
                    text_content = flow.response.text
                    data = json.loads(text_content)
                    
                    # 提取所有字符串字段
                    text_items = self.extract_text_from_json(data)
                    if text_items:
                        self.text_data.extend(text_items)
                        ctx.log.info(f"从JSON提取 {len(text_items)} 条文字")
                
                # 纯文本响应
                elif "text" in content_type:
                    text_content = flow.response.text
                    if len(text_content.strip()) > 0:
                        self.text_data.append({
                            "url": flow.request.url,
                            "content": text_content[:200]  # 截取前200字符
                        })
            
            except (json.JSONDecodeError, UnicodeDecodeError):
                # 非JSON或编码问题，尝试其他解析
                pass
        
        # 保存元数据
        self.save_metadata()
    
    def extract_text_from_json(self, data):
        """递归提取JSON中的所有字符串"""
        text_items = []
        
        if isinstance(data, dict):
            for key, value in data.items():
                if isinstance(value, str) and len(value.strip()) > 0:
                    text_items.append({
                        "field": key,
                        "value": value
                    })
                elif isinstance(value, (dict, list)):
                    text_items.extend(self.extract_text_from_json(value))
        
        elif isinstance(data, list):
            for item in data:
                if isinstance(item, str) and len(item.strip()) > 0:
                    text_items.append({
                        "field": "list_item",
                        "value": item
                    })
                elif isinstance(item, (dict, list)):
                    text_items.extend(self.extract_text_from_json(item))
        
        return text_items
    
    def save_metadata(self):
        """保存捕获的数据信息"""
        metadata = {
            "images": self.image_data,
            "texts": self.text_data
        }
        
        with open(self.output_dir / "metadata.json", "w", encoding="utf-8") as f:
            json.dump(metadata, f, ensure_ascii=False, indent=2)

# 运行命令：mitmdump -s app_capture.py

使用步骤：