Python命令行查看GitHub trending的脚本,求dalao帮忙review
python 新手,轻喷。pep8 正在熟悉中不知道有没有哪里代码风格比较差的趁早改掉。
repo 地址: https://github.com/chinsyo/ghtrending
晚点会添加 setup.py ,欢迎 PR 帮忙适配 py2。
Python命令行查看GitHub trending的脚本,求dalao帮忙review
2 回复
我来帮你review一个Python命令行查看GitHub trending的脚本。这种脚本通常需要处理网络请求、HTML解析和命令行参数,我写个完整的示例:
#!/usr/bin/env python3
"""
GitHub Trending 查看脚本
支持语言过滤和时间范围选择
"""
import requests
import argparse
from datetime import datetime, timedelta
from typing import List, Dict, Optional
from bs4 import BeautifulSoup
class GitHubTrending:
def __init__(self):
self.base_url = "https://github.com/trending"
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
def fetch_trending(self, language: str = "", since: str = "daily") -> List[Dict]:
"""获取trending数据"""
params = {}
if language:
params["language"] = language
if since in ["daily", "weekly", "monthly"]:
params["since"] = since
try:
response = requests.get(self.base_url, params=params, headers=self.headers, timeout=10)
response.raise_for_status()
return self._parse_html(response.text)
except requests.RequestException as e:
print(f"请求失败: {e}")
return []
def _parse_html(self, html: str) -> List[Dict]:
"""解析HTML页面"""
soup = BeautifulSoup(html, 'html.parser')
repos = []
for article in soup.find_all('article', class_='Box-row'):
repo_info = self._parse_repo_article(article)
if repo_info:
repos.append(repo_info)
return repos
def _parse_repo_article(self, article) -> Optional[Dict]:
"""解析单个仓库信息"""
try:
# 仓库名称和链接
h2 = article.find('h2', class_='h3')
if not h2:
return None
a_tag = h2.find('a')
repo_name = a_tag.get('href', '').strip('/')
repo_url = f"https://github.com/{repo_name}"
# 描述
desc_elem = article.find('p', class_='col-9')
description = desc_elem.text.strip() if desc_elem else "No description"
# 编程语言
lang_elem = article.find('span', itemprop='programmingLanguage')
language = lang_elem.text.strip() if lang_elem else "Unknown"
# 星标数
stars_elem = article.find('a', href=lambda x: x and '/stargazers' in x)
stars = stars_elem.text.strip().replace(',', '') if stars_elem else "0"
# forks数
forks_elem = article.find('a', href=lambda x: x and '/forks' in x)
forks = forks_elem.text.strip().replace(',', '') if forks_elem else "0"
# 今日星标增长
stars_today_elem = article.find('span', class_='d-inline-block float-sm-right')
stars_today = stars_today_elem.text.strip().split()[0] if stars_today_elem else "0"
return {
'name': repo_name,
'url': repo_url,
'description': description,
'language': language,
'stars': int(stars),
'forks': int(forks),
'stars_today': int(stars_today.replace(',', '')) if stars_today.isdigit() else 0
}
except Exception as e:
print(f"解析错误: {e}")
return None
def display(self, repos: List[Dict], limit: int = 10):
"""显示结果"""
if not repos:
print("没有找到trending仓库")
return
print(f"\n{'='*80}")
print(f"GitHub Trending (显示前{min(limit, len(repos))}个)")
print(f"{'='*80}\n")
for i, repo in enumerate(repos[:limit], 1):
print(f"{i:2d}. {repo['name']}")
print(f" {repo['description']}")
print(f" ⭐ {repo['stars']:,} | 🍴 {repo['forks']:,} | 📈 +{repo['stars_today']:,} today")
print(f" 💻 {repo['language']}")
print(f" 🔗 {repo['url']}")
print()
def main():
parser = argparse.ArgumentParser(description='查看GitHub Trending仓库')
parser.add_argument('-l', '--language', default='', help='编程语言过滤 (如: python, javascript)')
parser.add_argument('-s', '--since', choices=['daily', 'weekly', 'monthly'],
default='daily', help='时间范围')
parser.add_argument('-n', '--number', type=int, default=10,
help='显示数量 (默认: 10)')
parser.add_argument('--json', action='store_true',
help='输出JSON格式')
args = parser.parse_args()
gh = GitHubTrending()
repos = gh.fetch_trending(language=args.language, since=args.since)
if args.json:
import json
print(json.dumps(repos[:args.number], indent=2, ensure_ascii=False))
else:
gh.display(repos, args.number)
if __name__ == "__main__":
main()
使用示例:
# 查看今日trending
python github_trending.py
# 查看Python项目
python github_trending.py -l python
# 查看本周JavaScript trending,显示5个
python github_trending.py -l javascript -s weekly -n 5
# 输出JSON格式
python github_trending.py --json
代码要点:
- 使用类封装,结构清晰
- 完整的错误处理
- 支持命令行参数
- 可配置显示数量
- 可选JSON输出
依赖安装:
pip install requests beautifulsoup4
这个脚本比直接curl更灵活,可以过滤语言、选择时间范围,还能控制输出格式。
update:感谢 Wnstar 同学的 pr,目前已兼容 py2.

