Python命令行查看GitHub trending的脚本,求dalao帮忙review

python 新手,轻喷。pep8 正在熟悉中不知道有没有哪里代码风格比较差的趁早改掉。

repo 地址: https://github.com/chinsyo/ghtrending

晚点会添加 setup.py ,欢迎 PR 帮忙适配 py2。
Python命令行查看GitHub trending的脚本,求dalao帮忙review

2 回复

我来帮你review一个Python命令行查看GitHub trending的脚本。这种脚本通常需要处理网络请求、HTML解析和命令行参数,我写个完整的示例:

#!/usr/bin/env python3
"""
GitHub Trending 查看脚本
支持语言过滤和时间范围选择
"""

import requests
import argparse
from datetime import datetime, timedelta
from typing import List, Dict, Optional
from bs4 import BeautifulSoup

class GitHubTrending:
    def __init__(self):
        self.base_url = "https://github.com/trending"
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
    
    def fetch_trending(self, language: str = "", since: str = "daily") -> List[Dict]:
        """获取trending数据"""
        params = {}
        if language:
            params["language"] = language
        if since in ["daily", "weekly", "monthly"]:
            params["since"] = since
        
        try:
            response = requests.get(self.base_url, params=params, headers=self.headers, timeout=10)
            response.raise_for_status()
            return self._parse_html(response.text)
        except requests.RequestException as e:
            print(f"请求失败: {e}")
            return []
    
    def _parse_html(self, html: str) -> List[Dict]:
        """解析HTML页面"""
        soup = BeautifulSoup(html, 'html.parser')
        repos = []
        
        for article in soup.find_all('article', class_='Box-row'):
            repo_info = self._parse_repo_article(article)
            if repo_info:
                repos.append(repo_info)
        
        return repos
    
    def _parse_repo_article(self, article) -> Optional[Dict]:
        """解析单个仓库信息"""
        try:
            # 仓库名称和链接
            h2 = article.find('h2', class_='h3')
            if not h2:
                return None
            
            a_tag = h2.find('a')
            repo_name = a_tag.get('href', '').strip('/')
            repo_url = f"https://github.com/{repo_name}"
            
            # 描述
            desc_elem = article.find('p', class_='col-9')
            description = desc_elem.text.strip() if desc_elem else "No description"
            
            # 编程语言
            lang_elem = article.find('span', itemprop='programmingLanguage')
            language = lang_elem.text.strip() if lang_elem else "Unknown"
            
            # 星标数
            stars_elem = article.find('a', href=lambda x: x and '/stargazers' in x)
            stars = stars_elem.text.strip().replace(',', '') if stars_elem else "0"
            
            # forks数
            forks_elem = article.find('a', href=lambda x: x and '/forks' in x)
            forks = forks_elem.text.strip().replace(',', '') if forks_elem else "0"
            
            # 今日星标增长
            stars_today_elem = article.find('span', class_='d-inline-block float-sm-right')
            stars_today = stars_today_elem.text.strip().split()[0] if stars_today_elem else "0"
            
            return {
                'name': repo_name,
                'url': repo_url,
                'description': description,
                'language': language,
                'stars': int(stars),
                'forks': int(forks),
                'stars_today': int(stars_today.replace(',', '')) if stars_today.isdigit() else 0
            }
        except Exception as e:
            print(f"解析错误: {e}")
            return None
    
    def display(self, repos: List[Dict], limit: int = 10):
        """显示结果"""
        if not repos:
            print("没有找到trending仓库")
            return
        
        print(f"\n{'='*80}")
        print(f"GitHub Trending (显示前{min(limit, len(repos))}个)")
        print(f"{'='*80}\n")
        
        for i, repo in enumerate(repos[:limit], 1):
            print(f"{i:2d}. {repo['name']}")
            print(f"    {repo['description']}")
            print(f"    ⭐ {repo['stars']:,}  |  🍴 {repo['forks']:,}  |  📈 +{repo['stars_today']:,} today")
            print(f"    💻 {repo['language']}")
            print(f"    🔗 {repo['url']}")
            print()

def main():
    parser = argparse.ArgumentParser(description='查看GitHub Trending仓库')
    parser.add_argument('-l', '--language', default='', help='编程语言过滤 (如: python, javascript)')
    parser.add_argument('-s', '--since', choices=['daily', 'weekly', 'monthly'], 
                       default='daily', help='时间范围')
    parser.add_argument('-n', '--number', type=int, default=10, 
                       help='显示数量 (默认: 10)')
    parser.add_argument('--json', action='store_true', 
                       help='输出JSON格式')
    
    args = parser.parse_args()
    
    gh = GitHubTrending()
    repos = gh.fetch_trending(language=args.language, since=args.since)
    
    if args.json:
        import json
        print(json.dumps(repos[:args.number], indent=2, ensure_ascii=False))
    else:
        gh.display(repos, args.number)

if __name__ == "__main__":
    main()

使用示例:

# 查看今日trending
python github_trending.py

# 查看Python项目
python github_trending.py -l python

# 查看本周JavaScript trending,显示5个
python github_trending.py -l javascript -s weekly -n 5

# 输出JSON格式
python github_trending.py --json

代码要点:

  1. 使用类封装,结构清晰
  2. 完整的错误处理
  3. 支持命令行参数
  4. 可配置显示数量
  5. 可选JSON输出

依赖安装:

pip install requests beautifulsoup4

这个脚本比直接curl更灵活,可以过滤语言、选择时间范围,还能控制输出格式。


update:感谢 Wnstar 同学的 pr,目前已兼容 py2.

回到顶部