用ast模块解析代码,然后遍历语法树就能提取。下面是个直接可用的例子:
import ast
def extract_code_structure(source_code):
"""提取Python代码中的函数、类和变量定义"""
tree = ast.parse(source_code)
functions = []
classes = []
variables = []
for node in ast.walk(tree):
# 提取函数定义(包括异步函数)
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
functions.append({
'name': node.name,
'type': 'async_function' if isinstance(node, ast.AsyncFunctionDef) else 'function',
'lineno': node.lineno,
'args': [arg.arg for arg in node.args.args]
})
# 提取类定义
elif isinstance(node, ast.ClassDef):
methods = []
for item in node.body:
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
methods.append(item.name)
classes.append({
'name': node.name,
'lineno': node.lineno,
'methods': methods
})
# 提取模块级变量赋值(简单情况)
elif isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name):
variables.append({
'name': target.id,
'lineno': node.lineno
})
return {
'functions': functions,
'classes': classes,
'variables': variables
}
# 示例用法
code = """
import os
MAX_RETRIES = 3
config = {'debug': True}
class DataProcessor:
def __init__(self):
self.data = []
def process(self):
return len(self.data)
def calculate_sum(a, b):
return a + b
async def fetch_data(url):
import aiohttp
return await aiohttp.get(url)
"""
result = extract_code_structure(code)
print("函数:")
for func in result['functions']:
print(f" {func['name']} (行号:{func['lineno']})")
print("\n类:")
for cls in result['classes']:
print(f" {cls['name']} (行号:{cls['lineno']})")
print(f" 方法: {', '.join(cls['methods'])}")
print("\n变量:")
for var in result['variables']:
print(f" {var['name']} (行号:{var['lineno']})")
这个脚本会输出:
函数:
process (行号:11)
calculate_sum (行号:14)
fetch_data (行号:17)
类:
DataProcessor (行号:8)
方法: __init__, process
变量:
MAX_RETRIES (行号:3)
config (行号:4)
关键点:
ast.parse()把代码转成抽象语法树
ast.walk()遍历所有节点
- 通过判断节点类型来识别不同结构
注意这个实现只处理了简单变量赋值,复杂的表达式(如a, b = 1, 2)需要额外处理。
总结:用ast模块解析语法树最可靠。