diff --git a/scripts/pypi_riscv64_check.py b/scripts/pypi_riscv64_check.py new file mode 100644 index 0000000..fd92977 --- /dev/null +++ b/scripts/pypi_riscv64_check.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +""" +pypi_riscv64_check.py + +Fetches the top N PyPI packages by download count, filters down to those +shipping platform-specific (binary) wheels, and flags which ones already +provide riscv64 wheels in their latest release on PyPI. + +Data source: hugovk/top-pypi-packages (updated monthly, via GitHub raw) +Wheel info: PyPI JSON API https://pypi.org/pypi/{package}/json + +Usage: + python pypi_riscv64_check.py # default: top 30 + python pypi_riscv64_check.py --top 50 # top 50 binary-wheel packages + python pypi_riscv64_check.py --exclude-riscv64 # hide packages that already have it +""" + +import argparse +import json +import time +import urllib.request + +TOP_PACKAGES_URL = ( + "https://raw.githubusercontent.com/hugovk/top-pypi-packages/" + "main/top-pypi-packages.min.json" +) +PYPI_JSON_URL = "https://pypi.org/pypi/{package}/json" +REQUEST_DELAY = 0.5 # seconds between PyPI API calls (be polite) + + +def fetch_json(url: str) -> dict | list: + req = urllib.request.Request( + url, headers={"User-Agent": "pypi-riscv64-checker/1.0"} + ) + with urllib.request.urlopen(req, timeout=15) as r: + return json.loads(r.read()) + + +def analyse_package(name: str, download_count: int) -> dict | None: + """ + Returns a result dict if the package ships at least one binary wheel, + otherwise None (pure-Python or fetch error). + + A binary wheel is any .whl file whose filename does NOT end with + 'none-any.whl' (the platform-independent tag). + """ + try: + info = fetch_json(PYPI_JSON_URL.format(package=name)) + except Exception as exc: + print(f" [WARN] Could not fetch {name}: {exc}") + return None + + wheel_files = [ + u["filename"] + for u in info.get("urls", []) + if u["packagetype"] == "bdist_wheel" + ] + + binary_wheels = [f for f in wheel_files if not f.endswith("none-any.whl")] + if not binary_wheels: + return None # pure-Python or no wheels at all + + riscv64_wheels = [f for f in wheel_files if "riscv64" in f.lower()] + + return { + "project": name, + "download_count": download_count, + "binary_wheel_count": len(binary_wheels), + "has_riscv64": len(riscv64_wheels) > 0, + "riscv64_wheel_count": len(riscv64_wheels), + } + + +def fmt_count(n: int) -> str: + """Format large numbers with M/B suffixes.""" + if n >= 1_000_000_000: + return f"{n / 1_000_000_000:.2f}B" + if n >= 1_000_000: + return f"{n / 1_000_000:.1f}M" + return str(n) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--top", type=int, default=30, + help="Number of binary-wheel packages to collect (default: 30)" + ) + parser.add_argument( + "--exclude-riscv64", action="store_true", + help="Exclude packages that already have riscv64 wheels" + ) + args = parser.parse_args() + + print("Fetching top-packages dataset …") + dataset = fetch_json(TOP_PACKAGES_URL) + all_packages = dataset["rows"] + last_update = dataset.get("last_update", "unknown") + print(f" Dataset last updated: {last_update}") + print(f" Scanning for top {args.top} binary-wheel packages …\n") + + results = [] + scanned = 0 + + for pkg in all_packages: + if len(results) >= args.top: + break + + name = pkg["project"] + dl = pkg["download_count"] + result = analyse_package(name, dl) + scanned += 1 + + if result is None: + continue + + if args.exclude_riscv64 and result["has_riscv64"]: + print(f" SKIP (riscv64 exists): {name}") + continue + + results.append(result) + riscv_tag = "✓ riscv64" if result["has_riscv64"] else "✗ NO riscv64" + print(f" [{len(results):2d}] {name:<40s} {riscv_tag}") + time.sleep(REQUEST_DELAY) + + # --- Print table --- + print() + print(f"{'Rank':<5} {'Package':<35} {'Downloads':>12} {'Binary Wheels':>14} {'riscv64':>10}") + print("-" * 80) + for i, r in enumerate(results, 1): + riscv_col = f"✓ ({r['riscv64_wheel_count']})" if r["has_riscv64"] else "✗" + print( + f"{i:<5} {r['project']:<35} " + f"{fmt_count(r['download_count']):>12} " + f"{r['binary_wheel_count']:>14} " + f"{riscv_col:>10}" + ) + print() + + no_riscv = [r["project"] for r in results if not r["has_riscv64"]] + print(f"Packages WITHOUT riscv64 wheels ({len(no_riscv)}/{len(results)}):") + for name in no_riscv: + print(f" - {name}") + + print(f"\nScanned {scanned} packages total.") + + +if __name__ == "__main__": + main()