Show number of dependent packages

* With flag "-show" and a list of package names,
  call crates.io api to count dependent packages.
  * The packages are NOT fetched.
  * Optional dependent packages are not counted.
  * Indirect non-dev dependent packages are counted.
  * Finally all non-dev dependent packages are also
    included in the report.
  * The report is a table like this:

  build_deps[k] = # of non-dev-dependent packages of pkg[k]
  dev_deps[k] = # of all dependent packages of pkg[k]
  all_build_deps[k] = # of non-dev-dependent ... of pkg[1] to pkg[k]
  all_dev_deps[k] = # of all dependent ... of pkg[1] to pkg[k]
    k pkg         build_deps   dev_deps all_build_deps   all_dev_deps
    1 unicode-xid          0          0              0              0
    2 proc-macro2          1          3              0              1
    3 syn                  2        107              0            104
    4 quote                2         18              0            107
    5 remain               4         18              0            107

  Packages are sorted topologically by the non-dev-dependencies.
  The column all_build_deps should be 0 if the topological sort
  did not find cyclic dependency.

* Simplify find_dl_path with a crates.io api call to get max_version.
* Handle urllib.error.HTTPError exception
* Formatted by pyformat.

Test: get_rust_pkg.py -h
Test: get_rust_pkg.py -show syn remain quote
Test: get_rust_pkg.py -v -show remain syn quote

Change-Id: Ib4ece8dd391558fb5fd866b153f30cd2ef59f6b3
This commit is contained in:
Chih-Hung Hsieh
2020-05-04 16:36:33 -07:00
parent a0ba9fab45
commit 783dfd4c64

View File

@@ -22,10 +22,15 @@ Usage: get_rust_pkg.py -v -o tmp syn
Get the latest version of package syn, say 1.0.17, Get the latest version of package syn, say 1.0.17,
and untar it into tmp/syn-1.0.17. and untar it into tmp/syn-1.0.17.
This script will abort if the target directory exists. Usage: get_rust_pkg.py -show bindgen cxx
Count dependent packages of bindgen and cxx.
When downloading a package, its target directory should not exist,
or the download will be skipped.
""" """
import argparse import argparse
import functools
import json import json
import os import os
import re import re
@@ -48,35 +53,42 @@ def parse_args():
"""Parse main arguments.""" """Parse main arguments."""
parser = argparse.ArgumentParser("get_rust_pkg") parser = argparse.ArgumentParser("get_rust_pkg")
parser.add_argument( parser.add_argument(
"-v", action="store_true", default=False, "-v", action="store_true", default=False, help="echo executed commands")
help="echo executed commands")
parser.add_argument( parser.add_argument(
"-o", metavar="out_dir", default=".", "-o", metavar="out_dir", default=".", help="output directory")
help="output directory")
parser.add_argument( parser.add_argument(
dest="pkgs", metavar="pkg_name", nargs="+", "-show",
action="store_true",
default=False,
help="show all default dependent packages, using crates.io api")
parser.add_argument(
dest="pkgs",
metavar="pkg_name",
nargs="+",
help="name of Rust package to be fetched from crates.io") help="name of Rust package to be fetched from crates.io")
return parser.parse_args() return parser.parse_args()
def set2list(a_set):
return (" " + " ".join(sorted(a_set))) if a_set else ""
def echo(args, msg): def echo(args, msg):
if args.v: if args.v:
print("INFO: {}".format(msg)) print("INFO: {}".format(msg))
def pkg_base_name(args, name): def echo_all_deps(args, kind, deps):
"""Remove version string of name.""" if args.v and deps:
base = name print("INFO: now {} in {}:{}".format(len(deps), kind, set2list(deps)))
version = ""
match = PKG_VERSION_MATCHER.match(name)
def pkg_base_name(pkg):
match = PKG_VERSION_MATCHER.match(pkg)
if match is not None: if match is not None:
base = match.group(1) return (match.group(1), match.group(2))
version = match.group(2)
if version:
echo(args, "package base name: {} version: {}".format(base, version))
else: else:
echo(args, "package base name: {}".format(base)) return (pkg, "")
return base, version
def get_version_numbers(version): def get_version_numbers(version):
@@ -93,40 +105,29 @@ def is_newer_version(args, prev_version, prev_id, check_version, check_id):
(get_version_numbers(prev_version), prev_id)) (get_version_numbers(prev_version), prev_id))
def find_dl_path(args, name): def get_max_version(pkg):
"""Ask crates.io for the latest version download path.""" """Ask crates.io for a pkg's latest version."""
base_name, version = pkg_base_name(args, name) url = "https://crates.io/api/v1/crates/" + pkg
url = "https://crates.io/api/v1/crates/{}/versions".format(base_name)
echo(args, "get versions at {}".format(url))
with urllib.request.urlopen(url) as request: with urllib.request.urlopen(url) as request:
data = json.loads(request.read().decode()) data = json.loads(request.read().decode())
# version with the largest id number is assumed to be the latest return data["crate"]["max_version"]
last_id = 0
dl_path = ""
found_version = "" def find_dl_path(args, name):
for v in data["versions"]: """Ask crates.io for the latest version download path."""
# Return the given version if it is found. base_name, version = pkg_base_name(name)
if version == v["num"]: if not version:
dl_path = v["dl_path"] version = get_max_version(name)
found_version = version url = "https://crates.io/api/v1/crates/{}/{}".format(base_name, version)
break echo(args, "try to get dl_path from {}".format(url))
if version: # must find user specified version with urllib.request.urlopen(url) as request:
continue data = json.loads(request.read().decode())
# Skip yanked version. if "version" not in data or "dl_path" not in data["version"]:
if v["yanked"]: print("ERROR: cannot find version {} of package {}".format(
echo(args, "skip yanked version {}".format(v["num"])) version, base_name))
continue
# Remember the newest version.
if is_newer_version(args, found_version, last_id, v["num"], int(v["id"])):
last_id = int(v["id"])
found_version = v["num"]
dl_path = v["dl_path"]
if not dl_path:
print("ERROR: cannot find version {} of package {}"
.format(version, base_name))
return None return None
echo(args, "found download path for version {}".format(found_version)) echo(args, "found download path for version {}".format(version))
return dl_path return data["version"]["dl_path"]
def fetch_pkg(args, dl_path): def fetch_pkg(args, dl_path):
@@ -160,14 +161,258 @@ def fetch_pkg(args, dl_path):
return True return True
def get_crate_dependencies(args, pkg):
"""Ask crates.io for pkg's dependencies."""
echo(args, "Ask crates.io for {} ...".format(pkg))
try:
url = "https://crates.io/api/v1/crates/{}/{}/dependencies".format(
pkg, get_max_version(pkg))
with urllib.request.urlopen(url) as request:
data = json.loads(request.read().decode())
except urllib.error.HTTPError:
print("ERROR: failed to find {}".format(pkg))
return False, None, None
build_deps = set()
dev_deps = set()
for crate in data["dependencies"]:
if not crate["optional"]: # some package has a lot of optional features
# dev_deps is a super set of build_deps
dev_deps.add(crate["crate_id"])
if crate["kind"] != "dev":
build_deps.add(crate["crate_id"])
return True, build_deps, dev_deps
def compare_pkg_deps(pkg1, pkg2):
"""Compare dependency order of pkg1 and pkg2."""
base1, build_deps1, dev_deps1 = pkg1
base2, build_deps2, dev_deps2 = pkg2
# Some pkg1 can be build-dependent (non-dev-dependent) on pkg2,
# when pkg2 is only test-dependent (dev-dependent) on pkg1.
# This is not really a build dependency cycle, because pkg2
# can be built before pkg1, but tested after pkg1.
# So the dependency order is based on build_deps first, and then dev_deps.
if base1 in build_deps2:
return -1 # pkg2 needs base1
if base2 in build_deps1:
return 1 # pkg1 needs base2
if base1 in dev_deps2:
return -1 # pkg2 needs base1
if base2 in dev_deps1:
return 1 # pkg1 needs base2
# If there is no dependency between pkg1 and pkg2,
# order them by the size of build_deps or dev_deps, or the name.
count1 = (len(build_deps1), len(dev_deps1), base1)
count2 = (len(build_deps2), len(dev_deps2), base2)
if count1 != count2:
return -1 if count1 < count2 else 1
return 0
def sort_found_pkgs(tuples):
"""A topological sort of tuples based on build_deps."""
# tuples is a list of (base_name, build_deps, dev_deps)
# Use build_deps as the dependency relation in a topological sort.
# The new_tuples list is used in topological sort. It is the input tuples
# prefixed with a changing build_deps during the sorting process.
# Collect all package base names.
# Dependent packages not found in all_base_names will be treated as
# "external" and ignored in topological sort.
all_base_names = set(map(lambda t: t[0], tuples))
new_tuples = []
all_names = set()
for (base_name, build_deps, dev_deps) in tuples:
new_tuples.append((build_deps, (base_name, build_deps, dev_deps)))
all_names = all_names.union(build_deps)
external_names = all_names.difference(all_base_names)
new_tuples = list(
map(lambda t: (t[0].difference(external_names), t[1]), new_tuples))
sorted_tuples = []
# A brute force topological sort;
# tuples with empty build_deps are put before the others.
while new_tuples:
first_group = list(filter(lambda t: not t[0], new_tuples))
other_group = list(filter(lambda t: t[0], new_tuples))
new_tuples = []
if first_group:
# Remove the extra build_deps in first_group,
# then sort it, and add its tuples to the sorted_tuples list.
first_group = list(map(lambda t: t[1], first_group))
first_group.sort(key=functools.cmp_to_key(compare_pkg_deps))
sorted_tuples.extend(first_group)
# Copy other_group to new_tuples but remove names in the first_group.
base_names = set(map(lambda t: t[0], first_group))
new_tuples = list(
map(lambda t: (t[0].difference(base_names), t[1]), other_group))
else:
# There is a bug, or a cycle in the build_deps.
# If we include all optional dependent packages into build_deps,
# we will see one cycle: futures-util has an optional dependent
# on futures, which has a normal dependent on futures-util.
print("ERROR: leftover in topological sort: {}".format(
list(map(lambda t: t[1][1], other_group))))
# Anyway, sort the other_group to include them into final report.
other_group = list(map(lambda t: t[1], other_group))
other_group.sort(key=functools.cmp_to_key(compare_pkg_deps))
sorted_tuples.extend(other_group)
return sorted_tuples
def show_all_dependencies(args, found_pkgs):
"""Topological sort found_pkgs and report number of dependent packages."""
found_pkgs = sort_found_pkgs(found_pkgs)
max_pkg_length = 1
for (pkg, _, _) in found_pkgs:
max_pkg_length = max(max_pkg_length, len(pkg))
name_format = "{:" + str(max_pkg_length) + "s}"
print("\n##### Summary of all dependent package counts #####")
print("build_deps[k] = # of non-dev-dependent packages of pkg[k]")
print("dev_deps[k] = # of all dependent packages of pkg[k]")
print(
"all_build_deps[k] = # of non-dev-dependent packages of pkg[1] to pkg[k]")
print("all_dev_deps[k] = # of all dependent packages of pkg[1] to pkg[k]")
print(("{:>4s} " + name_format + " {:>10s} {:>10s} {:>14s} {:>14s}").format(
"k", "pkg", "build_deps", "dev_deps", "all_build_deps", "all_dev_deps"))
all_pkgs = set()
all_build_deps = set()
all_dev_deps = set()
k = 0
for (pkg, build_deps, dev_deps) in found_pkgs:
all_pkgs.add(pkg)
all_build_deps = all_build_deps.union(build_deps).difference(all_pkgs)
all_dev_deps = all_dev_deps.union(dev_deps).difference(all_pkgs)
k += 1
print(("{:4d} " + name_format + " {:10d} {:10d} {:14d} {:14d}").format(
k, pkg, len(build_deps), len(dev_deps), len(all_build_deps),
len(all_dev_deps)))
echo_all_deps(args, "all_build_deps", all_build_deps)
echo_all_deps(args, "all_dev_deps", all_dev_deps)
print("\nNOTE: from all {} package(s):{}".format(
len(all_pkgs), set2list(all_pkgs)))
print("NOTE: found {:3d} other non-dev-dependent package(s):{}".format(
len(all_build_deps), set2list(all_build_deps)))
print("NOTE: found {:3d} other dependent package(s):{}".format(
len(all_dev_deps), set2list(all_dev_deps)))
def crates_io_find_pkgs(args, pkgs, found_pkgs):
"""Call crates.io api to find direct dependent packages."""
success = True
for pkg in sorted(pkgs):
ok, build_deps, dev_deps = get_crate_dependencies(args, pkg)
if not ok:
success = False
else:
found_pkgs.append((pkg, build_deps, dev_deps))
return success
def add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg):
"""Add reachable non-dev dependencies to all_deps[pkg]'s dependencies."""
if pkg not in all_deps:
ok, build_deps, dev_deps = get_crate_dependencies(args, pkg)
if not ok:
return set()
all_deps[pkg] = (pkg, build_deps, dev_deps)
else:
(_, build_deps, dev_deps) = all_deps[pkg]
if pkg in visited:
return build_deps
visited.add(pkg)
for p in sorted(build_deps):
# If p was visited before as a non-core package and now added
# into core_pkgs, its dev_deps should be revisited .
revisit_dev_deps = False
if pkg in core_pkgs and p not in core_pkgs:
core_pkgs.add(p)
revisit_dev_deps = True
deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p)
build_deps = build_deps.union(deps)
if revisit_dev_deps:
(_, p_build_deps, p_dev_deps) = all_deps[p]
for q in sorted(p_dev_deps):
deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, q)
p_dev_deps = p_dev_deps.union(deps)
all_deps[p] = (p, p_build_deps, p_dev_deps)
if pkg in core_pkgs:
for p in sorted(dev_deps):
deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p)
dev_deps = dev_deps.union(deps)
all_deps[pkg] = (pkg, build_deps, dev_deps)
return build_deps
def add_indirect_build_deps(args, found_pkgs):
"""Add all indirect dependencies and return a new found_pkgs."""
all_deps = dict(map(lambda t: (t[0], t), found_pkgs))
core_pkgs = set(map(lambda t: t[0], found_pkgs))
dev_pkgs = set()
dump_pkgs(args, "BEFORE", all_deps, core_pkgs, dev_pkgs)
visited = set()
for pkg in sorted(core_pkgs):
add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg)
dev_pkgs = visited.difference(core_pkgs)
dump_pkgs(args, "AFTER", all_deps, core_pkgs, dev_pkgs)
found_pkgs = list(map(lambda p: all_deps[p], core_pkgs))
found_pkgs.sort(key=lambda t: t[0])
return found_pkgs
def echo_dump_found_pkgs(args, msg, all_deps, pkgs):
if not args.v or not pkgs:
return
echo(args, msg)
for pkg in sorted(pkgs):
(_, build_deps, dev_deps) = all_deps[pkg]
for (name, deps) in [("normal", build_deps), ("dev", dev_deps)]:
pattern = " {} has {} " + name + " deps:{}"
echo(args, pattern.format(pkg, len(deps), set2list(deps)))
def dump_pkgs(args, msg, all_deps, core_pkgs, dev_pkgs):
echo_dump_found_pkgs(args, msg + " core_pkgs:", all_deps, core_pkgs)
echo_dump_found_pkgs(args, msg + " other_dev_pkgs:", all_deps, dev_pkgs)
def show_dependencies(args):
"""Calls crates.io api to find dependent packages; returns True on success."""
all_pkgs = set(map(lambda p: pkg_base_name(p)[0], args.pkgs))
if "" in all_pkgs:
# TODO(chh): detect and report ill formed names in args.pkgs
print("WARNING: skip some ill formatted pkg arguments.")
all_pkgs = all_pkgs.remove("")
if not all_pkgs:
print("ERROR: no valid pkg names to show dependencies.")
return False
pkgs = sorted(all_pkgs)
found_pkgs = []
success = crates_io_find_pkgs(args, pkgs, found_pkgs)
if not found_pkgs:
return False
# All normal (non-dev) dependent packages will be added into found_pkgs.
found_pkgs = add_indirect_build_deps(args, found_pkgs)
show_all_dependencies(args, found_pkgs)
return success
def main(): def main():
args = parse_args() args = parse_args()
packages = list(dict.fromkeys(args.pkgs)) if args.show:
echo(args, "to fetch packags = {}".format(packages)) # only show dependencies, not to fetch any package
if not show_dependencies(args):
sys.exit(2)
return
echo(args, "to fetch packages = {}".format(args.pkgs))
errors = [] errors = []
for pkg in packages: for pkg in args.pkgs:
echo(args, "trying to fetch package {}".format(pkg)) echo(args, "trying to fetch package {}".format(pkg))
if not fetch_pkg(args, find_dl_path(args, pkg)): try:
if not fetch_pkg(args, find_dl_path(args, pkg)):
errors.append(pkg)
except urllib.error.HTTPError:
errors.append(pkg) errors.append(pkg)
if errors: if errors:
for pkg in errors: for pkg in errors: