ieslab_skills/skills/weekly-report/fetch_gitea_data.py
admin c734958fa6 替换 gh CLI 为 Gitea REST API,新增 fetch_gitea_data.py
- 移除对 gh CLI 的依赖,改用 Gitea /api/v1 接口
- fetch_gitea_data.py:自动解析 git remote 获取实例地址和仓库路径,支持分页,输出结构化 JSON
- 私有仓库通过 GITEA_TOKEN 环境变量鉴权,公开仓库无需配置
- 更新 SKILL.md 对应执行步骤

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-27 14:07:43 +08:00

219 lines
7.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
从 Gitea API 拉取过去 N 天的 commits 和 merged PR 数据。
自动从 git remote origin 解析 Gitea 实例地址和仓库路径。
可选环境变量:
GITEA_TOKEN - 私有仓库访问令牌(公开仓库无需配置)
"""
import json
import os
import subprocess
import sys
import urllib.request
import urllib.error
from datetime import datetime, timedelta, timezone
from urllib.parse import urlparse
def get_remote_info() -> tuple[str, str, str]:
"""从 git remote origin 解析 base_url、owner、repo"""
try:
url = subprocess.check_output(
["git", "remote", "get-url", "origin"],
stderr=subprocess.DEVNULL,
text=True,
).strip()
except subprocess.CalledProcessError:
print("错误:当前目录不是 git 仓库,或没有 origin remote", file=sys.stderr)
sys.exit(1)
# 去掉 .git 后缀
if url.endswith(".git"):
url = url[:-4]
parsed = urlparse(url)
base_url = f"{parsed.scheme}://{parsed.netloc}"
# path 格式: /owner/repo
parts = parsed.path.strip("/").split("/")
if len(parts) < 2:
print(f"错误:无法从 remote URL 解析 owner/repo{url}", file=sys.stderr)
sys.exit(1)
owner, repo = parts[0], parts[1]
return base_url, owner, repo
def gitea_get(base_url: str, path: str) -> object:
"""调用 Gitea API自动附加 token如有"""
token = os.environ.get("GITEA_TOKEN", "")
headers = {"Content-Type": "application/json"}
if token:
headers["Authorization"] = f"token {token}"
url = f"{base_url}/api/v1{path}"
req = urllib.request.Request(url, headers=headers)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
return json.loads(resp.read().decode())
except urllib.error.HTTPError as e:
if e.code == 401:
print("错误API 返回 401私有仓库需要设置 GITEA_TOKEN 环境变量", file=sys.stderr)
sys.exit(1)
print(f"错误API 请求失败 {url} -> HTTP {e.code}", file=sys.stderr)
sys.exit(1)
except urllib.error.URLError as e:
print(f"错误:无法连接到 Gitea 实例 {base_url} - {e.reason}", file=sys.stderr)
sys.exit(1)
def fetch_commits(base_url: str, owner: str, repo: str, since: datetime) -> list[dict]:
"""拉取指定日期之后的所有 commits分页"""
since_str = since.strftime("%Y-%m-%dT%H:%M:%SZ")
commits = []
page = 1
while True:
data = gitea_get(
base_url,
f"/repos/{owner}/{repo}/commits?sha=main&limit=50&page={page}&since={since_str}",
)
if not data:
break
commits.extend(data)
if len(data) < 50:
break
page += 1
return commits
def fetch_merged_prs(base_url: str, owner: str, repo: str, since: datetime) -> list[dict]:
"""拉取已关闭的 PR过滤出 since 之后合并的"""
prs = []
page = 1
while True:
data = gitea_get(
base_url,
f"/repos/{owner}/{repo}/pulls?state=closed&limit=50&page={page}",
)
if not data:
break
for pr in data:
merged_at = pr.get("merged_at") or pr.get("merged")
if not merged_at:
continue
# 解析合并时间
try:
merged_dt = datetime.fromisoformat(merged_at.replace("Z", "+00:00"))
except ValueError:
continue
if merged_dt >= since:
prs.append(pr)
# 如果最后一条 PR 的合并时间早于 since可以提前退出
if data:
last_merged = data[-1].get("merged_at") or data[-1].get("merged")
if last_merged:
try:
last_dt = datetime.fromisoformat(last_merged.replace("Z", "+00:00"))
if last_dt < since:
break
except ValueError:
pass
if len(data) < 50:
break
page += 1
return prs
def group_by_author(commits: list[dict], prs: list[dict]) -> dict:
"""按作者分组整理数据"""
authors = {}
for c in commits:
commit_info = c.get("commit", {})
author = commit_info.get("author", {})
name = author.get("name", "unknown")
email = author.get("email", "")
key = name
if key not in authors:
authors[key] = {"name": name, "email": email, "commits": [], "prs": []}
authors[key]["commits"].append({
"sha": c.get("sha", "")[:8],
"message": commit_info.get("message", "").split("\n")[0],
"date": author.get("date", "")[:10],
"files": [f.get("filename") for f in c.get("files", [])],
})
for pr in prs:
author = pr.get("user", {}) or pr.get("poster", {})
name = author.get("login", "unknown")
key = name
if key not in authors:
authors[key] = {"name": name, "email": "", "commits": [], "prs": []}
authors[key]["prs"].append({
"number": pr.get("number"),
"title": pr.get("title", ""),
"merged_at": (pr.get("merged_at") or "")[:10],
"additions": pr.get("additions", 0),
"deletions": pr.get("deletions", 0),
})
return authors
def main():
import argparse
parser = argparse.ArgumentParser(description="拉取 Gitea 仓库过去 N 天的数据")
parser.add_argument("--days", type=int, default=7, help="统计天数默认7天")
parser.add_argument("--output", default="-", help="输出 JSON 文件路径(默认输出到 stdout")
args = parser.parse_args()
base_url, owner, repo = get_remote_info()
since = datetime.now(timezone.utc) - timedelta(days=args.days)
print(f"仓库:{base_url}/{owner}/{repo}", file=sys.stderr)
print(f"统计范围:{since.strftime('%Y-%m-%d')} 至今", file=sys.stderr)
print("正在拉取 commits...", file=sys.stderr)
commits = fetch_commits(base_url, owner, repo, since)
print(f"{len(commits)}", file=sys.stderr)
print("正在拉取已合并 PR...", file=sys.stderr)
prs = fetch_merged_prs(base_url, owner, repo, since)
print(f"{len(prs)}", file=sys.stderr)
result = {
"meta": {
"base_url": base_url,
"owner": owner,
"repo": repo,
"since": since.strftime("%Y-%m-%d"),
"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M"),
},
"summary": {
"total_commits": len(commits),
"total_prs": len(prs),
"contributors": 0,
},
"by_author": group_by_author(commits, prs),
"all_prs": prs,
}
result["summary"]["contributors"] = len(result["by_author"])
output = json.dumps(result, ensure_ascii=False, indent=2)
if args.output == "-":
print(output)
else:
with open(args.output, "w", encoding="utf-8") as f:
f.write(output)
print(f"数据已写入 {args.output}", file=sys.stderr)
if __name__ == "__main__":
main()