如何用 Python 实现研报内容分析与收藏功能?
发现一篇可收藏学习的研报,拿来跟大家分享下,其中里边的内容可以尝试用 Python 实现《考夫曼基金新上市股投资法——申万大师系列-成长投资篇之三》——连接: http://quant.10jqka.com.cn/platform/html/article.html#id/90834856
研报分享:"考夫曼基金新上市股投资法——申万大师系列-成长投资篇之三":

如何用 Python 实现研报内容分析与收藏功能?
4 回复
学习了
import re
import json
from datetime import datetime
from typing import List, Dict, Optional
import sqlite3
from dataclasses import dataclass, asdict
@dataclass
class ResearchReport:
"""研报数据类"""
id: str
title: str
content: str
source: str
publish_date: str
tags: List[str]
is_favorite: bool = False
created_at: str = None
def __post_init__(self):
if self.created_at is None:
self.created_at = datetime.now().isoformat()
class ReportAnalyzer:
"""研报内容分析器"""
def __init__(self):
self.keywords = {
'buy': ['买入', '推荐', '增持', '强烈推荐'],
'sell': ['卖出', '减持', '回避', '卖出评级'],
'neutral': ['中性', '持有', '观望']
}
def analyze_sentiment(self, content: str) -> Dict:
"""分析研报情感倾向"""
result = {
'sentiment': 'neutral',
'keywords_found': [],
'score': 0
}
for sentiment, words in self.keywords.items():
found_words = [word for word in words if word in content]
if found_words:
result['keywords_found'].extend(found_words)
if sentiment == 'buy':
result['score'] += len(found_words)
elif sentiment == 'sell':
result['score'] -= len(found_words)
if result['score'] > 0:
result['sentiment'] = 'positive'
elif result['score'] < 0:
result['sentiment'] = 'negative'
return result
def extract_key_metrics(self, content: str) -> List[str]:
"""提取关键指标"""
# 匹配常见的财务指标模式
patterns = [
r'EPS[\s::]*([\d\.]+)元',
r'PE[\s::]*([\d\.]+)倍',
r'目标价[\s::]*([\d\.]+)元',
r'营收[\s::]*([\d\.]+)[亿万]元'
]
metrics = []
for pattern in patterns:
matches = re.findall(pattern, content)
metrics.extend(matches)
return metrics
class ReportManager:
"""研报管理器(含收藏功能)"""
def __init__(self, db_path='reports.db'):
self.db_path = db_path
self._init_database()
self.analyzer = ReportAnalyzer()
def _init_database(self):
"""初始化数据库"""
with sqlite3.connect(self.db_path) as conn:
conn.execute('''
CREATE TABLE IF NOT EXISTS reports (
id TEXT PRIMARY KEY,
title TEXT NOT NULL,
content TEXT,
source TEXT,
publish_date TEXT,
tags TEXT,
is_favorite INTEGER DEFAULT 0,
created_at TEXT,
analysis_result TEXT
)
''')
def add_report(self, report: ResearchReport) -> str:
"""添加研报"""
analysis = self.analyzer.analyze_sentiment(report.content)
metrics = self.analyzer.extract_key_metrics(report.content)
analysis_data = {
'sentiment': analysis['sentiment'],
'score': analysis['score'],
'keywords': analysis['keywords_found'],
'metrics': metrics
}
with sqlite3.connect(self.db_path) as conn:
conn.execute('''
INSERT OR REPLACE INTO reports
(id, title, content, source, publish_date, tags, is_favorite, created_at, analysis_result)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
report.id,
report.title,
report.content,
report.source,
report.publish_date,
json.dumps(report.tags, ensure_ascii=False),
1 if report.is_favorite else 0,
report.created_at,
json.dumps(analysis_data, ensure_ascii=False)
))
return report.id
def toggle_favorite(self, report_id: str) -> bool:
"""切换收藏状态"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.execute(
'SELECT is_favorite FROM reports WHERE id = ?',
(report_id,)
)
result = cursor.fetchone()
if result:
new_status = 0 if result[0] else 1
conn.execute(
'UPDATE reports SET is_favorite = ? WHERE id = ?',
(new_status, report_id)
)
return bool(new_status)
return False
def get_favorites(self) -> List[Dict]:
"""获取收藏的研报"""
with sqlite3.connect(self.db_path) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.execute(
'SELECT * FROM reports WHERE is_favorite = 1 ORDER BY created_at DESC'
)
return [dict(row) for row in cursor.fetchall()]
def search_reports(self, keyword: str,
sentiment: Optional[str] = None,
source: Optional[str] = None) -> List[Dict]:
"""搜索研报"""
query = 'SELECT * FROM reports WHERE 1=1'
params = []
if keyword:
query += ' AND (title LIKE ? OR content LIKE ?)'
params.extend([f'%{keyword}%', f'%{keyword}%'])
if sentiment:
query += ' AND analysis_result LIKE ?'
params.append(f'%"sentiment":"{sentiment}"%')
if source:
query += ' AND source = ?'
params.append(source)
with sqlite3.connect(self.db_path) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.execute(query, params)
return [dict(row) for row in cursor.fetchall()]
# 使用示例
def main():
# 1. 创建管理器
manager = ReportManager()
# 2. 创建研报实例
report = ResearchReport(
id='20231120001',
title='某公司2023年三季度业绩点评',
content='我们给予买入评级,目标价25.6元,预计EPS 1.2元,PE 20倍...',
source='中信证券',
publish_date='2023-11-20',
tags=['科技', '买入', '业绩报告']
)
# 3. 添加研报(自动分析)
report_id = manager.add_report(report)
print(f"已添加研报: {report_id}")
# 4. 收藏/取消收藏
is_fav = manager.toggle_favorite(report_id)
print(f"收藏状态: {'已收藏' if is_fav else '未收藏'}")
# 5. 获取收藏列表
favorites = manager.get_favorites()
print(f"收藏数量: {len(favorites)}")
# 6. 搜索研报
results = manager.search_reports(keyword='买入', sentiment='positive')
print(f"找到 {len(results)} 份相关研报")
if __name__ == '__main__':
main()
这个实现包含三个核心部分:
-
ResearchReport数据类:用dataclass定义研报数据结构,包含标题、内容、来源、标签等字段。
-
ReportAnalyzer分析器:
analyze_sentiment():分析情感倾向(正面/负面/中性)extract_key_metrics():提取EPS、PE、目标价等关键指标
-
ReportManager管理器:
add_report():添加研报并自动分析toggle_favorite():切换收藏状态get_favorites():获取收藏列表search_reports():支持关键词、情感倾向、来源的多条件搜索
数据用SQLite存储,analysis_result字段保存JSON格式的分析结果。使用示例展示了完整的添加、收藏、搜索流程。
总结:用dataclass+SQLite实现结构化存储,正则表达式做内容分析。
后续我们会每周二周四推出研报,敬请关注!
v2 好像对你们这方面兴趣不太大啊

