- 移除对 gh CLI 的依赖,改用 Gitea /api/v1 接口 - fetch_gitea_data.py:自动解析 git remote 获取实例地址和仓库路径,支持分页,输出结构化 JSON - 私有仓库通过 GITEA_TOKEN 环境变量鉴权,公开仓库无需配置 - 更新 SKILL.md 对应执行步骤 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
219 lines
7.1 KiB
Python
219 lines
7.1 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
从 Gitea API 拉取过去 N 天的 commits 和 merged PR 数据。
|
||
自动从 git remote origin 解析 Gitea 实例地址和仓库路径。
|
||
|
||
可选环境变量:
|
||
GITEA_TOKEN - 私有仓库访问令牌(公开仓库无需配置)
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import subprocess
|
||
import sys
|
||
import urllib.request
|
||
import urllib.error
|
||
from datetime import datetime, timedelta, timezone
|
||
from urllib.parse import urlparse
|
||
|
||
|
||
def get_remote_info() -> tuple[str, str, str]:
|
||
"""从 git remote origin 解析 base_url、owner、repo"""
|
||
try:
|
||
url = subprocess.check_output(
|
||
["git", "remote", "get-url", "origin"],
|
||
stderr=subprocess.DEVNULL,
|
||
text=True,
|
||
).strip()
|
||
except subprocess.CalledProcessError:
|
||
print("错误:当前目录不是 git 仓库,或没有 origin remote", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
# 去掉 .git 后缀
|
||
if url.endswith(".git"):
|
||
url = url[:-4]
|
||
|
||
parsed = urlparse(url)
|
||
base_url = f"{parsed.scheme}://{parsed.netloc}"
|
||
# path 格式: /owner/repo
|
||
parts = parsed.path.strip("/").split("/")
|
||
if len(parts) < 2:
|
||
print(f"错误:无法从 remote URL 解析 owner/repo:{url}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
owner, repo = parts[0], parts[1]
|
||
return base_url, owner, repo
|
||
|
||
|
||
def gitea_get(base_url: str, path: str) -> object:
|
||
"""调用 Gitea API,自动附加 token(如有)"""
|
||
token = os.environ.get("GITEA_TOKEN", "")
|
||
headers = {"Content-Type": "application/json"}
|
||
if token:
|
||
headers["Authorization"] = f"token {token}"
|
||
|
||
url = f"{base_url}/api/v1{path}"
|
||
req = urllib.request.Request(url, headers=headers)
|
||
|
||
try:
|
||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||
return json.loads(resp.read().decode())
|
||
except urllib.error.HTTPError as e:
|
||
if e.code == 401:
|
||
print("错误:API 返回 401,私有仓库需要设置 GITEA_TOKEN 环境变量", file=sys.stderr)
|
||
sys.exit(1)
|
||
print(f"错误:API 请求失败 {url} -> HTTP {e.code}", file=sys.stderr)
|
||
sys.exit(1)
|
||
except urllib.error.URLError as e:
|
||
print(f"错误:无法连接到 Gitea 实例 {base_url} - {e.reason}", file=sys.stderr)
|
||
sys.exit(1)
|
||
|
||
|
||
def fetch_commits(base_url: str, owner: str, repo: str, since: datetime) -> list[dict]:
|
||
"""拉取指定日期之后的所有 commits(分页)"""
|
||
since_str = since.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||
commits = []
|
||
page = 1
|
||
while True:
|
||
data = gitea_get(
|
||
base_url,
|
||
f"/repos/{owner}/{repo}/commits?sha=main&limit=50&page={page}&since={since_str}",
|
||
)
|
||
if not data:
|
||
break
|
||
commits.extend(data)
|
||
if len(data) < 50:
|
||
break
|
||
page += 1
|
||
return commits
|
||
|
||
|
||
def fetch_merged_prs(base_url: str, owner: str, repo: str, since: datetime) -> list[dict]:
|
||
"""拉取已关闭的 PR,过滤出 since 之后合并的"""
|
||
prs = []
|
||
page = 1
|
||
while True:
|
||
data = gitea_get(
|
||
base_url,
|
||
f"/repos/{owner}/{repo}/pulls?state=closed&limit=50&page={page}",
|
||
)
|
||
if not data:
|
||
break
|
||
for pr in data:
|
||
merged_at = pr.get("merged_at") or pr.get("merged")
|
||
if not merged_at:
|
||
continue
|
||
# 解析合并时间
|
||
try:
|
||
merged_dt = datetime.fromisoformat(merged_at.replace("Z", "+00:00"))
|
||
except ValueError:
|
||
continue
|
||
if merged_dt >= since:
|
||
prs.append(pr)
|
||
# 如果最后一条 PR 的合并时间早于 since,可以提前退出
|
||
if data:
|
||
last_merged = data[-1].get("merged_at") or data[-1].get("merged")
|
||
if last_merged:
|
||
try:
|
||
last_dt = datetime.fromisoformat(last_merged.replace("Z", "+00:00"))
|
||
if last_dt < since:
|
||
break
|
||
except ValueError:
|
||
pass
|
||
if len(data) < 50:
|
||
break
|
||
page += 1
|
||
return prs
|
||
|
||
|
||
def group_by_author(commits: list[dict], prs: list[dict]) -> dict:
|
||
"""按作者分组整理数据"""
|
||
authors = {}
|
||
|
||
for c in commits:
|
||
commit_info = c.get("commit", {})
|
||
author = commit_info.get("author", {})
|
||
name = author.get("name", "unknown")
|
||
email = author.get("email", "")
|
||
key = name
|
||
|
||
if key not in authors:
|
||
authors[key] = {"name": name, "email": email, "commits": [], "prs": []}
|
||
authors[key]["commits"].append({
|
||
"sha": c.get("sha", "")[:8],
|
||
"message": commit_info.get("message", "").split("\n")[0],
|
||
"date": author.get("date", "")[:10],
|
||
"files": [f.get("filename") for f in c.get("files", [])],
|
||
})
|
||
|
||
for pr in prs:
|
||
author = pr.get("user", {}) or pr.get("poster", {})
|
||
name = author.get("login", "unknown")
|
||
key = name
|
||
|
||
if key not in authors:
|
||
authors[key] = {"name": name, "email": "", "commits": [], "prs": []}
|
||
authors[key]["prs"].append({
|
||
"number": pr.get("number"),
|
||
"title": pr.get("title", ""),
|
||
"merged_at": (pr.get("merged_at") or "")[:10],
|
||
"additions": pr.get("additions", 0),
|
||
"deletions": pr.get("deletions", 0),
|
||
})
|
||
|
||
return authors
|
||
|
||
|
||
def main():
|
||
import argparse
|
||
|
||
parser = argparse.ArgumentParser(description="拉取 Gitea 仓库过去 N 天的数据")
|
||
parser.add_argument("--days", type=int, default=7, help="统计天数(默认7天)")
|
||
parser.add_argument("--output", default="-", help="输出 JSON 文件路径(默认输出到 stdout)")
|
||
args = parser.parse_args()
|
||
|
||
base_url, owner, repo = get_remote_info()
|
||
since = datetime.now(timezone.utc) - timedelta(days=args.days)
|
||
|
||
print(f"仓库:{base_url}/{owner}/{repo}", file=sys.stderr)
|
||
print(f"统计范围:{since.strftime('%Y-%m-%d')} 至今", file=sys.stderr)
|
||
|
||
print("正在拉取 commits...", file=sys.stderr)
|
||
commits = fetch_commits(base_url, owner, repo, since)
|
||
print(f" 共 {len(commits)} 条", file=sys.stderr)
|
||
|
||
print("正在拉取已合并 PR...", file=sys.stderr)
|
||
prs = fetch_merged_prs(base_url, owner, repo, since)
|
||
print(f" 共 {len(prs)} 条", file=sys.stderr)
|
||
|
||
result = {
|
||
"meta": {
|
||
"base_url": base_url,
|
||
"owner": owner,
|
||
"repo": repo,
|
||
"since": since.strftime("%Y-%m-%d"),
|
||
"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M"),
|
||
},
|
||
"summary": {
|
||
"total_commits": len(commits),
|
||
"total_prs": len(prs),
|
||
"contributors": 0,
|
||
},
|
||
"by_author": group_by_author(commits, prs),
|
||
"all_prs": prs,
|
||
}
|
||
result["summary"]["contributors"] = len(result["by_author"])
|
||
|
||
output = json.dumps(result, ensure_ascii=False, indent=2)
|
||
|
||
if args.output == "-":
|
||
print(output)
|
||
else:
|
||
with open(args.output, "w", encoding="utf-8") as f:
|
||
f.write(output)
|
||
print(f"数据已写入 {args.output}", file=sys.stderr)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|