如何用 Python 实现研报内容分析与收藏功能？

发现一篇可收藏学习的研报，拿来跟大家分享下，其中里边的内容可以尝试用 Python 实现《考夫曼基金新上市股投资法——申万大师系列-成长投资篇之三》——连接： http://quant.10jqka.com.cn/platform/html/article.html#id/90834856

研报分享:"考夫曼基金新上市股投资法——申万大师系列-成长投资篇之三":

yibo5220 1楼

学习了

ionicwang 2楼

import re
import json
from datetime import datetime
from typing import List, Dict, Optional
import sqlite3
from dataclasses import dataclass, asdict

@dataclass
class ResearchReport:
    """研报数据类"""
    id: str
    title: str
    content: str
    source: str
    publish_date: str
    tags: List[str]
    is_favorite: bool = False
    created_at: str = None
    
    def __post_init__(self):
        if self.created_at is None:
            self.created_at = datetime.now().isoformat()

class ReportAnalyzer:
    """研报内容分析器"""
    
    def __init__(self):
        self.keywords = {
            'buy': ['买入', '推荐', '增持', '强烈推荐'],
            'sell': ['卖出', '减持', '回避', '卖出评级'],
            'neutral': ['中性', '持有', '观望']
        }
    
    def analyze_sentiment(self, content: str) -> Dict:
        """分析研报情感倾向"""
        result = {
            'sentiment': 'neutral',
            'keywords_found': [],
            'score': 0
        }
        
        for sentiment, words in self.keywords.items():
            found_words = [word for word in words if word in content]
            if found_words:
                result['keywords_found'].extend(found_words)
                if sentiment == 'buy':
                    result['score'] += len(found_words)
                elif sentiment == 'sell':
                    result['score'] -= len(found_words)
        
        if result['score'] > 0:
            result['sentiment'] = 'positive'
        elif result['score'] < 0:
            result['sentiment'] = 'negative'
            
        return result
    
    def extract_key_metrics(self, content: str) -> List[str]:
        """提取关键指标"""
        # 匹配常见的财务指标模式
        patterns = [
            r'EPS[\s：:]*([\d\.]+)元',
            r'PE[\s：:]*([\d\.]+)倍',
            r'目标价[\s：:]*([\d\.]+)元',
            r'营收[\s：:]*([\d\.]+)[亿万]元'
        ]
        
        metrics = []
        for pattern in patterns:
            matches = re.findall(pattern, content)
            metrics.extend(matches)
        
        return metrics

class ReportManager:
    """研报管理器（含收藏功能）"""
    
    def __init__(self, db_path='reports.db'):
        self.db_path = db_path
        self._init_database()
        self.analyzer = ReportAnalyzer()
    
    def _init_database(self):
        """初始化数据库"""
        with sqlite3.connect(self.db_path) as conn:
            conn.execute('''
                CREATE TABLE IF NOT EXISTS reports (
                    id TEXT PRIMARY KEY,
                    title TEXT NOT NULL,
                    content TEXT,
                    source TEXT,
                    publish_date TEXT,
                    tags TEXT,
                    is_favorite INTEGER DEFAULT 0,
                    created_at TEXT,
                    analysis_result TEXT
                )
            ''')
    
    def add_report(self, report: ResearchReport) -> str:
        """添加研报"""
        analysis = self.analyzer.analyze_sentiment(report.content)
        metrics = self.analyzer.extract_key_metrics(report.content)
        
        analysis_data = {
            'sentiment': analysis['sentiment'],
            'score': analysis['score'],
            'keywords': analysis['keywords_found'],
            'metrics': metrics
        }
        
        with sqlite3.connect(self.db_path) as conn:
            conn.execute('''
                INSERT OR REPLACE INTO reports 
                (id, title, content, source, publish_date, tags, is_favorite, created_at, analysis_result)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                report.id,
                report.title,
                report.content,
                report.source,
                report.publish_date,
                json.dumps(report.tags, ensure_ascii=False),
                1 if report.is_favorite else 0,
                report.created_at,
                json.dumps(analysis_data, ensure_ascii=False)
            ))
        
        return report.id
    
    def toggle_favorite(self, report_id: str) -> bool:
        """切换收藏状态"""
        with sqlite3.connect(self.db_path) as conn:
            cursor = conn.execute(
                'SELECT is_favorite FROM reports WHERE id = ?',
                (report_id,)
            )
            result = cursor.fetchone()
            
            if result:
                new_status = 0 if result[0] else 1
                conn.execute(
                    'UPDATE reports SET is_favorite = ? WHERE id = ?',
                    (new_status, report_id)
                )
                return bool(new_status)
        
        return False
    
    def get_favorites(self) -> List[Dict]:
        """获取收藏的研报"""
        with sqlite3.connect(self.db_path) as conn:
            conn.row_factory = sqlite3.Row
            cursor = conn.execute(
                'SELECT * FROM reports WHERE is_favorite = 1 ORDER BY created_at DESC'
            )
            return [dict(row) for row in cursor.fetchall()]
    
    def search_reports(self, keyword: str, 
                      sentiment: Optional[str] = None,
                      source: Optional[str] = None) -> List[Dict]:
        """搜索研报"""
        query = 'SELECT * FROM reports WHERE 1=1'
        params = []
        
        if keyword:
            query += ' AND (title LIKE ? OR content LIKE ?)'
            params.extend([f'%{keyword}%', f'%{keyword}%'])
        
        if sentiment:
            query += ' AND analysis_result LIKE ?'
            params.append(f'%"sentiment":"{sentiment}"%')
        
        if source:
            query += ' AND source = ?'
            params.append(source)
        
        with sqlite3.connect(self.db_path) as conn:
            conn.row_factory = sqlite3.Row
            cursor = conn.execute(query, params)
            return [dict(row) for row in cursor.fetchall()]

# 使用示例
def main():
    # 1. 创建管理器
    manager = ReportManager()
    
    # 2. 创建研报实例
    report = ResearchReport(
        id='20231120001',
        title='某公司2023年三季度业绩点评',
        content='我们给予买入评级，目标价25.6元，预计EPS 1.2元，PE 20倍...',
        source='中信证券',
        publish_date='2023-11-20',
        tags=['科技', '买入', '业绩报告']
    )
    
    # 3. 添加研报（自动分析）
    report_id = manager.add_report(report)
    print(f"已添加研报: {report_id}")
    
    # 4. 收藏/取消收藏
    is_fav = manager.toggle_favorite(report_id)
    print(f"收藏状态: {'已收藏' if is_fav else '未收藏'}")
    
    # 5. 获取收藏列表
    favorites = manager.get_favorites()
    print(f"收藏数量: {len(favorites)}")
    
    # 6. 搜索研报
    results = manager.search_reports(keyword='买入', sentiment='positive')
    print(f"找到 {len(results)} 份相关研报")

if __name__ == '__main__':
    main()

这个实现包含三个核心部分：

ResearchReport数据类：用dataclass定义研报数据结构，包含标题、内容、来源、标签等字段。
ReportAnalyzer分析器：
- analyze_sentiment()：分析情感倾向（正面/负面/中性）
- extract_key_metrics()：提取EPS、PE、目标价等关键指标
ReportManager管理器：
- add_report()：添加研报并自动分析
- toggle_favorite()：切换收藏状态
- get_favorites()：获取收藏列表
- search_reports()：支持关键词、情感倾向、来源的多条件搜索

数据用SQLite存储，analysis_result字段保存JSON格式的分析结果。使用示例展示了完整的添加、收藏、搜索流程。

总结：用dataclass+SQLite实现结构化存储，正则表达式做内容分析。

sinazl 3楼

后续我们会每周二周四推出研报，敬请关注！

ionicwang 4楼

v2 好像对你们这方面兴趣不太大啊