Python中如何开发一个机器学习算法集

没用 numpy，对初学者很友好

要开发一个机器学习算法集，核心是设计一个模块化、可扩展的架构。下面是一个基础框架的实现，包含了算法基类、具体算法实现和简单的评估流程。

from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error

class BaseMLAlgorithm(ABC):
    """机器学习算法基类"""
    
    def __init__(self, name: str, **params):
        self.name = name
        self.params = params
        self.model = None
        self.is_fitted = False
    
    @abstractmethod
    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        """训练模型"""
        pass
    
    @abstractmethod
    def predict(self, X: np.ndarray) -> np.ndarray:
        """预测"""
        pass
    
    def evaluate(self, X: np.ndarray, y: np.ndarray, metric: str = 'accuracy') -> float:
        """评估模型"""
        if not self.is_fitted:
            raise ValueError("Model must be fitted before evaluation")
        
        y_pred = self.predict(X)
        
        if metric == 'accuracy':
            return accuracy_score(y, y_pred)
        elif metric == 'mse':
            return mean_squared_error(y, y_pred)
        else:
            raise ValueError(f"Unsupported metric: {metric}")

class LinearRegressionAlgorithm(BaseMLAlgorithm):
    """线性回归实现"""
    
    def __init__(self, **params):
        super().__init__("LinearRegression", **params)
        self.coef_ = None
        self.intercept_ = None
    
    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        # 添加偏置项
        X_with_bias = np.c_[np.ones(X.shape[0]), X]
        
        # 使用正规方程求解
        theta = np.linalg.inv(X_with_bias.T @ X_with_bias) @ X_with_bias.T @ y
        
        self.intercept_ = theta[0]
        self.coef_ = theta[1:]
        self.is_fitted = True
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        if not self.is_fitted:
            raise ValueError("Model must be fitted before prediction")
        return self.intercept_ + X @ self.coef_

class KNNClassifier(BaseMLAlgorithm):
    """K近邻分类器"""
    
    def __init__(self, n_neighbors: int = 5, **params):
        super().__init__("KNNClassifier", **params)
        self.n_neighbors = n_neighbors
        self.X_train = None
        self.y_train = None
    
    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        self.X_train = X
        self.y_train = y
        self.is_fitted = True
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        if not self.is_fitted:
            raise ValueError("Model must be fitted before prediction")
        
        predictions = []
        for x in X:
            # 计算欧氏距离
            distances = np.sqrt(np.sum((self.X_train - x) ** 2, axis=1))
            
            # 获取最近的k个邻居
            nearest_indices = np.argsort(distances)[:self.n_neighbors]
            nearest_labels = self.y_train[nearest_indices]
            
            # 多数投票
            unique, counts = np.unique(nearest_labels, return_counts=True)
            predictions.append(unique[np.argmax(counts)])
        
        return np.array(predictions)

class MLAlgorithmCollection:
    """算法集合管理器"""
    
    def __init__(self):
        self.algorithms: Dict[str, BaseMLAlgorithm] = {}
    
    def add_algorithm(self, algorithm: BaseMLAlgorithm) -> None:
        """添加算法到集合"""
        self.algorithms[algorithm.name] = algorithm
    
    def get_algorithm(self, name: str) -> Optional[BaseMLAlgorithm]:
        """获取指定算法"""
        return self.algorithms.get(name)
    
    def list_algorithms(self) -> List[str]:
        """列出所有可用算法"""
        return list(self.algorithms.keys())
    
    def compare_algorithms(self, X: np.ndarray, y: np.ndarray, 
                          test_size: float = 0.2, metric: str = 'accuracy') -> Dict[str, float]:
        """比较所有算法的性能"""
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
        
        results = {}
        for name, algo in self.algorithms.items():
            try:
                # 训练模型
                algo.fit(X_train, y_train)
                
                # 评估模型
                score = algo.evaluate(X_test, y_test, metric)
                results[name] = score
                
                print(f"{name}: {metric} = {score:.4f}")
            except Exception as e:
                print(f"Error with {name}: {str(e)}")
                results[name] = None
        
        return results

# 使用示例
if __name__ == "__main__":
    # 创建算法集合
    collection = MLAlgorithmCollection()
    
    # 添加算法
    collection.add_algorithm(LinearRegressionAlgorithm())
    collection.add_algorithm(KNNClassifier(n_neighbors=3))
    
    # 生成示例数据
    np.random.seed(42)
    X_reg = np.random.randn(100, 2)
    y_reg = 2 * X_reg[:, 0] + 3 * X_reg[:, 1] + np.random.randn(100) * 0.1
    
    X_clf = np.random.randn(100, 2)
    y_clf = (X_clf[:, 0] + X_clf[:, 1] > 0).astype(int)
    
    # 比较算法性能
    print("Regression comparison (using MSE):")
    collection.compare_algorithms(X_reg, y_reg, metric='mse')
    
    print("\nClassification comparison (using accuracy):")
    collection.compare_algorithms(X_clf, y_clf, metric='accuracy')

这个实现的关键点：