Python多因子策略1.0如何实现与优化

分享一个带权重的多因子策略的思路部分，若有不足还希望大家指点指点~ 策略是在Ricequant实现的，可以点击下面的链接进行详细的查看:---->具体的策略思路

股票池是中证 800 成分股；所有因子是 factors。

stocks  = index_components('000906.XSHG')
factors = ['日换手率','月换手率','周换手率','pe','pcf','pb','企业价值_市值比','股息率','市盈率相对盈利增长比率','每股股利_市值比','roe','roa','投入资本回报率 ROIC',
'息税前利润 /营业总收入','权益乘数','流动比率','fcff_市值比','净资产收益率(增长率)',
'基本每股收益增长率','每股收益_市值比','销售毛利率','未分配利润_市值比','账面市值比','市值','营收收入增长','产权比率','ev_ebitda',
'每股资产增长','资产负债率', '存货周转率','销售净利率','利润总额增长','总资产周转率','营业利润增长','市销率','营业利润率','本周新增评论数','本周新增关注者','本周总评论数','本周总关注者数','本周卖出行为数','本周买入行为数']
获得某期限内的所有星期五交易日：]

def get_date_list(start_date,end_date):
#     获得某日期内的星期五交易日
    dates = get_trading_dates(start_date, end_date)
    date_list=[]
    for date in dates:
        if date.weekday() == 4:
            date_list.append(date.strftime('%Y-%m-%d'))
    return date_list

1、根据日期获得股票池的所有因子数据

def get_data(date):
    #总因子，未必都选用
    factors1 = ['pe','pcf','pb','企业价值','股息率','市盈率相对盈利增长比率','每股股利','roe','roa','投入资本回报率 ROIC',
    '息税前利润 /营业总收入','权益乘数','流动比率','企业自由现金流量 FCFF','净资产收益率(增长率)',
    '基本每股收益增长率','每股收益 EPS','销售毛利率','未分配利润','总权益','市值','营收收入增长','产权比率','ebit',
    '每股资产增长','资产负债率','存货周转率','销售净利率','利润总额增长','总资产周转率',
    '营业利润增长','营业利润','营业收入','市销率'
    ]
q = query(
    fundamentals.eod_derivative_indicator.pe_ratio,fundamentals.eod_derivative_indicator.pcf_ratio,
    fundamentals.eod_derivative_indicator.pb_ratio,
    fundamentals.eod_derivative_indicator.ev,fundamentals.eod_derivative_indicator.dividend_yield,
    fundamentals.eod_derivative_indicator.peg_ratio,
    fundamentals.financial_indicator.dividend_per_share,
    fundamentals.financial_indicator.adjusted_return_on_equity_weighted_average,#roe
    fundamentals.financial_indicator.return_on_asset_net_profit,#roa
    fundamentals.financial_indicator.return_on_invested_capital,#投入资本回报率 ROIC
    fundamentals.financial_indicator.ebit_to_revenue,#息税前利润 /营业总收入
    fundamentals.financial_indicator.du_equity_multiplier,#权益乘数(杜邦分析)
    fundamentals.financial_indicator.current_ratio,#流动比率
    fundamentals.financial_indicator.fcff,#企业自由现金流量 FCFF
    fundamentals.financial_indicator.inc_return_on_equity,#净资产收益率(摊薄)(同比增长率)
    fundamentals.financial_indicator.inc_earnings_per_share,#基本每股收益(同比增长率)
    fundamentals.financial_indicator.earnings_per_share,#每股收益 EPS - 基本
    fundamentals.financial_indicator.gross_profit_margin,#销售毛利率
    fundamentals.balance_sheet.undistributed_profit,#未分配利润
    (fundamentals.balance_sheet.total_equity),#总权益
    (fundamentals.eod_derivative_indicator.market_cap),#市值
    fundamentals.financial_indicator.inc_operating_revenue,#营收收入增长
    fundamentals.financial_indicator.debt_to_equity_ratio,#产权比率
    fundamentals.financial_indicator.ebitda,#ebit
    fundamentals.financial_indicator.inc_book_per_share,#每股资产增长
    fundamentals.financial_indicator.debt_to_asset_ratio,#资产负债
    fundamentals.financial_indicator.inventory_turnover,#存货周转
    fundamentals.financial_indicator.net_profit_margin,#销售净利率
    fundamentals.financial_indicator.inc_profit_before_tax,#利润总额增长
    fundamentals.financial_indicator.total_asset_turnover,#总资产周转率
    fundamentals.financial_indicator.inc_gross_profit,#营业利润增长
    fundamentals.income_statement.gross_profit,#营业利润
    fundamentals.income_statement.revenue,#营业收入
    fundamentals.eod_derivative_indicator.ps_ratio,#市销率
    ).filter(
        fundamentals.income_statement.stockcode.in_(stocks)
    )

fundamentals_df = get_fundamentals(q,date)

df =     fundamentals_df


df.items=factors1


str1 = "\'"+date+"\'"
df = df[:,str1,:]



df['账面市值比']=df['总权益']/df['市值']
df['每股股利_市值比']= df['每股股利']/df['市值']
df['fcff_市值比']=df['企业自由现金流量 FCFF']/df['市值']
df['每股收益_市值比']=df['每股收益 EPS']/df['市值']
df['未分配利润_市值比']=df['未分配利润']/df['市值']
df['企业价值_市值比']=df['企业价值']/df['市值']
df['ev_ebitda']=df['ebit']/df['企业价值']
df['营业利润率']=df['营业利润']/df['营业收入']
try:
    df['周换手率']= get_turnover_rate(stock_set)['week'].T.values

except:
    df['周换手率'] = Series()
try:
    df['月换手率']= get_turnover_rate(stock_set)['month'].T.values
except:
    df['月换手率'] =  Series()
try:
    df['日换手率']= get_turnover_rate(stock_set)['today'].T.values    
except:
    df['日换手率']= Series()

try:
    x = xueqiu.top_stocks(field='new_comments',frequency='1w',count = 800)
    # print(x)
    x = x.set_index('order_book_id')
    x = x.reindex(index=stock_set)
    x = x.sort_index()
    x = x.values
    df['本周新增评论数'] = x

    x = xueqiu.top_stocks(field='new_followers',frequency='1w',count = 800)
    x = x.set_index('order_book_id')
    x = x.reindex(index=stock_set)
    x = x.sort_index()
    x = x.values
    df['本周新增关注者'] = x


    x = xueqiu.top_stocks(field='total_comments',frequency='1w',count = 800)
    x = x.set_index('order_book_id')
    x = x.reindex(index=stock_set)
    x = x.sort_index()
    x = x.values
    df['本周总评论数'] = x


    x = xueqiu.top_stocks(field='total_followers',frequency='1w',count = 800)
    x = x.set_index('order_book_id')
    x = x.reindex(index=stock_set)
    x = x.sort_index()
    x = x.values
    df['本周总关注者数'] = x
    
    
    
    x = xueqiu.top_stocks(field='sell_actions',frequency='1w',count = 800)
    x = x.set_index('order_book_id')
    x = x.reindex(index=stock_set)
    x = x.sort_index()
    x = x.values
    df['本周卖出行为数'] = x


    x = xueqiu.top_stocks(field='buy_actions',frequency='1w',count = 800)
    x = x.set_index('order_book_id')
    x = x.reindex(index=stock_set)
    x = x.sort_index()
    x = x.values
    df['本周买入行为数'] = x
except:
    df['本周新增评论数'] = Series()
    df['本周新增关注者']= Series()
    df['本周总评论数']= Series()
    df['本周总关注者数']= Series()
    df['本周卖出行为数']= Series()
    df['本周买入行为数']= Series()
    
    
df = df[factors]
return df

2、因子的权重的确定

获取 IC：

因子在某一期的 IC 指的是该期因子对股票的下期收益的预测值和股票下期的实际回报值在横截面上的相关系数。此次使用 python 中 scipy.stats 库中的 pearsonr 方法简化运算，规定因子的 IC=pearsonr(factor_values_0,returns_0_1)，假设现在是第 2 期的时间，其中 factor_values_0 指第 0 期的因子值，returns_0_1 指第 0 期至第 1 期的涨跌幅, pearsonr 为相关系数计算。

def get_currentIC(date):
#     获得 IC
    lst_date = date_list[date_list.index(date)-1]
#获得上个星期五交易日的因子数据
    fd2 = get_data(lst_date)
factordata = Schmidt(fd2)
fd2 = winsorize(fd2)
pearson_df = pd.concat([fd2,all_returns[date]],axis=1)
pearson_df = pearson_df.replace(np.nan,0)

IC = pd.DataFrame()
for fac in factors:
    ic,_ = st.pearsonr(pearson_df[fac],pearson_df[date])
    IC[fac] = np.array([ic]) 
return IC

处理异常值的 winsorize 方法：

def winsorize(df):
    output = pd.DataFrame()
    for i in range(df.columns.size):
        s = df[df.columns[i]]
        down = np.mean(s)-3*np.std(s)
        up = np.mean(s)+3*np.std(s)
    final = s.replace(s[s&lt;down],down)
    final = final.replace(s[s&gt;up],up)
    output[df.columns[i]] = final

return output

其中 all_returns 是股票的涨跌，通过以下方式获得所有股票的涨跌幅

def count_reven(stocks,s_date,e_date):
计算涨幅
    df_cn = get_price(stocks,start_date=s_date,end_date=e_date)['ClosingPx']
df_cn = df_cn.T
day0 = Series(df_cn.ix[0])
day1 = Series(df_cn.ix[-1])

rets = day1/day0-1

return rets

计算 IR 与权重:

然后，是根据 IC 来计算 IR，因子的 IR 值是指因子 IC 的均值和因子 IC 的标准差的比值。此次试验中需要最大化 IR 值;并获得权重

具体代码体现如下：

N=8
def get_bestweight(currentdate):   #传入当前日期，得到当前日期及之前 8 期的数据所得到的最优权重
    date = [date_list[date_list.index(currentdate)-i-1] for i in range(N)]  #取前 8 期日期
    IC = pd.DataFrame()
    for i in range(N):
#         print(date[i])
        ic = get_currentIC(date[i])    #计算每个日期的 IC 值
        IC = pd.concat([IC,ic],axis=0)
    IC =IC.dropna(axis=1)
    eff_facs = IC.columns
    mat = np.mat(IC.cov())                     #按照公式计算最优权重
    mat = nlg.inv(mat)
    weight = mat*np.mat(IC.mean()).reshape(len(mat),1)
    weight = np.array(weight.reshape(len(weight),))[0]
    return eff_facs,weight

至此，已经获得所有因子的权重了，但是权重的数量级参差不齐;

为了使得不让某一因子权重特别大，进行无量纲化处理

    train = weight.values
    nm=MinMaxScaler()
    train=nm.fit_transform(train)

至此就获得了因子的权重了，剩下就是对因子进行筛选，根据因子的贡献度进行因子的筛选：

将股票根据涨跌幅进行排序，分成 10 组，平均涨跌幅最高组的涨跌幅记为 high，最低记为 low。对每一个因子排序分成 10 组，平均涨幅最高一组的涨跌幅记为 port_high，最低的一组记为 port_low。

规定因子贡献度=abs(port_high-port_low)/(high-low)，因此因子贡献度越高，因子效果也好。按照因子贡献度排序，取前 7 个因子贡献度最大的因子。

然后给股票打分，选择交易的股票，具体的思路是：

将上一期时间每个因子值按大小分成十组，将收益率最大的一组规定其组号为 10，以此类推，最低的规定为 1。

获得这期每个股票在每个有效因子上的组号，乘以对应因子的权重，作为该股票在该因子上的得分，累加每个股票在每个有效因子上的得分，作为总分。取总分最高的前 30 个股票进行交易。

Python多因子策略1.0如何实现与优化

bupafengyu 1楼

这个问题涉及代码实现，我来给你一个完整的多因子策略框架。

首先，我们需要构建一个基础的多因子模型。这里使用pandas和numpy进行数据处理，yfinance获取股票数据（需要先安装：pip install yfinance）。

import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta

class MultiFactorStrategy:
    def __init__(self, symbols, start_date, end_date):
        self.symbols = symbols
        self.start_date = start_date
        self.end_date = end_date
        self.data = None
        self.factors = {}
        
    def fetch_data(self):
        """获取股票价格数据"""
        print("正在下载数据...")
        data = yf.download(self.symbols, start=self.start_date, 
                          end=self.end_date)['Adj Close']
        self.data = data
        return data
    
    def calculate_factor(self, name, factor_func):
        """计算单个因子"""
        self.factors[name] = factor_func(self.data)
        return self.factors[name]
    
    def combine_factors(self, weights):
        """合成多因子得分"""
        if not self.factors:
            raise ValueError("请先计算因子")
        
        combined = pd.DataFrame(index=self.data.index)
        for factor_name, weight in weights.items():
            if factor_name in self.factors:
                combined[factor_name] = self.factors[factor_name] * weight
        
        # 标准化并求和
        combined_normalized = combined.apply(lambda x: (x - x.mean()) / x.std())
        combined_score = combined_normalized.sum(axis=1)
        return combined_score
    
    def generate_signals(self, combined_score, top_n=10):
        """生成交易信号：买入得分最高的top_n只股票"""
        signals = pd.DataFrame(index=combined_score.index, columns=self.symbols)
        
        for date in combined_score.index:
            # 获取当日因子得分
            daily_scores = combined_score.loc[date]
            if isinstance(daily_scores, pd.Series):
                # 选择得分最高的股票
                top_stocks = daily_scores.nlargest(top_n).index
                signals.loc[date, top_stocks] = 1  # 买入信号
                signals.loc[date, daily_scores.index.difference(top_stocks)] = 0  # 不持有
                
        return signals.fillna(0)

# 定义几个简单的因子计算函数
def momentum_factor(data, window=20):
    """动量因子：过去20日的收益率"""
    returns = data.pct_change(window)
    return returns

def volatility_factor(data, window=20):
    """波动率因子：过去20日收益率的波动率"""
    volatility = data.pct_change().rolling(window).std()
    return -volatility  # 负号表示低波动更好

def volume_factor(data, window=5):
    """成交量因子：近期成交量变化"""
    # 这里简化处理，实际需要成交量数据
    volume_change = data.rolling(window).mean().pct_change(window)
    return volume_change

# 使用示例
if __name__ == "__main__":
    # 股票列表（示例）
    symbols = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 
               'TSLA', 'NVDA', 'JPM', 'JNJ', 'WMT']
    
    # 设置回测期间
    end_date = datetime.now()
    start_date = end_date - timedelta(days=365*2)
    
    # 创建策略实例
    strategy = MultiFactorStrategy(symbols, start_date, end_date)
    
    # 获取数据
    price_data = strategy.fetch_data()
    
    # 计算因子
    strategy.calculate_factor('momentum', 
                             lambda x: momentum_factor(x, window=20))
    strategy.calculate_factor('volatility', 
                             lambda x: volatility_factor(x, window=20))
    
    # 合成因子（设置权重）
    weights = {'momentum': 0.6, 'volatility': 0.4}
    combined = strategy.combine_factors(weights)
    
    # 生成交易信号
    signals = strategy.generate_signals(combined, top_n=5)
    
    print("策略构建完成！")
    print(f"数据形状: {price_data.shape}")
    print(f"信号数据形状: {signals.shape}")
    print(f"最近交易日的买入信号: {signals.iloc[-1][signals.iloc[-1] == 1].index.tolist()}")

这个框架包含了多因子策略的核心组件：数据获取、因子计算、因子合成和信号生成。你可以通过以下方式优化：

因子库扩展：添加更多因子（价值、质量、成长等）
权重优化：使用IC/IR分析或机器学习优化因子权重
风险控制：加入行业、市值中性化处理
回测系统：集成backtrader或zipline进行完整回测

总结：先跑通基础框架，再逐步添加复杂因子和风控模块。