#!/usr/bin/env python3 """ 从 Gitea API 拉取过去 N 天的 commits 和 merged PR 数据。 自动从 git remote origin 解析 Gitea 实例地址和仓库路径。 可选环境变量: GITEA_TOKEN - 私有仓库访问令牌(公开仓库无需配置) """ import json import os import subprocess import sys import urllib.request import urllib.error from datetime import datetime, timedelta, timezone from urllib.parse import urlparse def get_remote_info() -> tuple[str, str, str]: """从 git remote origin 解析 base_url、owner、repo""" try: url = subprocess.check_output( ["git", "remote", "get-url", "origin"], stderr=subprocess.DEVNULL, text=True, ).strip() except subprocess.CalledProcessError: print("错误:当前目录不是 git 仓库,或没有 origin remote", file=sys.stderr) sys.exit(1) # 去掉 .git 后缀 if url.endswith(".git"): url = url[:-4] parsed = urlparse(url) base_url = f"{parsed.scheme}://{parsed.netloc}" # path 格式: /owner/repo parts = parsed.path.strip("/").split("/") if len(parts) < 2: print(f"错误:无法从 remote URL 解析 owner/repo:{url}", file=sys.stderr) sys.exit(1) owner, repo = parts[0], parts[1] return base_url, owner, repo def gitea_get(base_url: str, path: str) -> object: """调用 Gitea API,自动附加 token(如有)""" token = os.environ.get("GITEA_TOKEN", "") headers = {"Content-Type": "application/json"} if token: headers["Authorization"] = f"token {token}" url = f"{base_url}/api/v1{path}" req = urllib.request.Request(url, headers=headers) try: with urllib.request.urlopen(req, timeout=15) as resp: return json.loads(resp.read().decode()) except urllib.error.HTTPError as e: if e.code == 401: print("错误:API 返回 401,私有仓库需要设置 GITEA_TOKEN 环境变量", file=sys.stderr) sys.exit(1) print(f"错误:API 请求失败 {url} -> HTTP {e.code}", file=sys.stderr) sys.exit(1) except urllib.error.URLError as e: print(f"错误:无法连接到 Gitea 实例 {base_url} - {e.reason}", file=sys.stderr) sys.exit(1) def fetch_commits(base_url: str, owner: str, repo: str, since: datetime) -> list[dict]: """拉取指定日期之后的所有 commits(分页)""" since_str = since.strftime("%Y-%m-%dT%H:%M:%SZ") commits = [] page = 1 while True: data = gitea_get( base_url, f"/repos/{owner}/{repo}/commits?sha=main&limit=50&page={page}&since={since_str}", ) if not data: break commits.extend(data) if len(data) < 50: break page += 1 return commits def fetch_merged_prs(base_url: str, owner: str, repo: str, since: datetime) -> list[dict]: """拉取已关闭的 PR,过滤出 since 之后合并的""" prs = [] page = 1 while True: data = gitea_get( base_url, f"/repos/{owner}/{repo}/pulls?state=closed&limit=50&page={page}", ) if not data: break for pr in data: merged_at = pr.get("merged_at") or pr.get("merged") if not merged_at: continue # 解析合并时间 try: merged_dt = datetime.fromisoformat(merged_at.replace("Z", "+00:00")) except ValueError: continue if merged_dt >= since: prs.append(pr) # 如果最后一条 PR 的合并时间早于 since,可以提前退出 if data: last_merged = data[-1].get("merged_at") or data[-1].get("merged") if last_merged: try: last_dt = datetime.fromisoformat(last_merged.replace("Z", "+00:00")) if last_dt < since: break except ValueError: pass if len(data) < 50: break page += 1 return prs def group_by_author(commits: list[dict], prs: list[dict]) -> dict: """按作者分组整理数据""" authors = {} for c in commits: commit_info = c.get("commit", {}) author = commit_info.get("author", {}) name = author.get("name", "unknown") email = author.get("email", "") key = name if key not in authors: authors[key] = {"name": name, "email": email, "commits": [], "prs": []} authors[key]["commits"].append({ "sha": c.get("sha", "")[:8], "message": commit_info.get("message", "").split("\n")[0], "date": author.get("date", "")[:10], "files": [f.get("filename") for f in c.get("files", [])], }) for pr in prs: author = pr.get("user", {}) or pr.get("poster", {}) name = author.get("login", "unknown") key = name if key not in authors: authors[key] = {"name": name, "email": "", "commits": [], "prs": []} authors[key]["prs"].append({ "number": pr.get("number"), "title": pr.get("title", ""), "merged_at": (pr.get("merged_at") or "")[:10], "additions": pr.get("additions", 0), "deletions": pr.get("deletions", 0), }) return authors def main(): import argparse parser = argparse.ArgumentParser(description="拉取 Gitea 仓库过去 N 天的数据") parser.add_argument("--days", type=int, default=7, help="统计天数(默认7天)") parser.add_argument("--output", default="-", help="输出 JSON 文件路径(默认输出到 stdout)") args = parser.parse_args() base_url, owner, repo = get_remote_info() since = datetime.now(timezone.utc) - timedelta(days=args.days) print(f"仓库:{base_url}/{owner}/{repo}", file=sys.stderr) print(f"统计范围:{since.strftime('%Y-%m-%d')} 至今", file=sys.stderr) print("正在拉取 commits...", file=sys.stderr) commits = fetch_commits(base_url, owner, repo, since) print(f" 共 {len(commits)} 条", file=sys.stderr) print("正在拉取已合并 PR...", file=sys.stderr) prs = fetch_merged_prs(base_url, owner, repo, since) print(f" 共 {len(prs)} 条", file=sys.stderr) result = { "meta": { "base_url": base_url, "owner": owner, "repo": repo, "since": since.strftime("%Y-%m-%d"), "generated_at": datetime.now().strftime("%Y-%m-%d %H:%M"), }, "summary": { "total_commits": len(commits), "total_prs": len(prs), "contributors": 0, }, "by_author": group_by_author(commits, prs), "all_prs": prs, } result["summary"]["contributors"] = len(result["by_author"]) output = json.dumps(result, ensure_ascii=False, indent=2) if args.output == "-": print(output) else: with open(args.output, "w", encoding="utf-8") as f: f.write(output) print(f"数据已写入 {args.output}", file=sys.stderr) if __name__ == "__main__": main()