Show number of dependent packages

* With flag "-show" and a list of package names,
  call crates.io api to count dependent packages.
  * The packages are NOT fetched.
  * Optional dependent packages are not counted.
  * Indirect non-dev dependent packages are counted.
  * Finally all non-dev dependent packages are also
    included in the report.
  * The report is a table like this:

  build_deps[k] = # of non-dev-dependent packages of pkg[k]
  dev_deps[k] = # of all dependent packages of pkg[k]
  all_build_deps[k] = # of non-dev-dependent ... of pkg[1] to pkg[k]
  all_dev_deps[k] = # of all dependent ... of pkg[1] to pkg[k]
    k pkg         build_deps   dev_deps all_build_deps   all_dev_deps
    1 unicode-xid          0          0              0              0
    2 proc-macro2          1          3              0              1
    3 syn                  2        107              0            104
    4 quote                2         18              0            107
    5 remain               4         18              0            107

  Packages are sorted topologically by the non-dev-dependencies.
  The column all_build_deps should be 0 if the topological sort
  did not find cyclic dependency.

* Simplify find_dl_path with a crates.io api call to get max_version.
* Handle urllib.error.HTTPError exception
* Formatted by pyformat.

Test: get_rust_pkg.py -h
Test: get_rust_pkg.py -show syn remain quote
Test: get_rust_pkg.py -v -show remain syn quote

Change-Id: Ib4ece8dd391558fb5fd866b153f30cd2ef59f6b3
This commit is contained in:
Chih-Hung Hsieh
2020-05-04 16:36:33 -07:00
parent a0ba9fab45
commit 783dfd4c64

View File

@@ -22,10 +22,15 @@ Usage: get_rust_pkg.py -v -o tmp syn
Get the latest version of package syn, say 1.0.17,
and untar it into tmp/syn-1.0.17.
This script will abort if the target directory exists.
Usage: get_rust_pkg.py -show bindgen cxx
Count dependent packages of bindgen and cxx.
When downloading a package, its target directory should not exist,
or the download will be skipped.
"""
import argparse
import functools
import json
import os
import re
@@ -48,35 +53,42 @@ def parse_args():
"""Parse main arguments."""
parser = argparse.ArgumentParser("get_rust_pkg")
parser.add_argument(
"-v", action="store_true", default=False,
help="echo executed commands")
"-v", action="store_true", default=False, help="echo executed commands")
parser.add_argument(
"-o", metavar="out_dir", default=".",
help="output directory")
"-o", metavar="out_dir", default=".", help="output directory")
parser.add_argument(
dest="pkgs", metavar="pkg_name", nargs="+",
"-show",
action="store_true",
default=False,
help="show all default dependent packages, using crates.io api")
parser.add_argument(
dest="pkgs",
metavar="pkg_name",
nargs="+",
help="name of Rust package to be fetched from crates.io")
return parser.parse_args()
def set2list(a_set):
return (" " + " ".join(sorted(a_set))) if a_set else ""
def echo(args, msg):
if args.v:
print("INFO: {}".format(msg))
def pkg_base_name(args, name):
"""Remove version string of name."""
base = name
version = ""
match = PKG_VERSION_MATCHER.match(name)
def echo_all_deps(args, kind, deps):
if args.v and deps:
print("INFO: now {} in {}:{}".format(len(deps), kind, set2list(deps)))
def pkg_base_name(pkg):
match = PKG_VERSION_MATCHER.match(pkg)
if match is not None:
base = match.group(1)
version = match.group(2)
if version:
echo(args, "package base name: {} version: {}".format(base, version))
return (match.group(1), match.group(2))
else:
echo(args, "package base name: {}".format(base))
return base, version
return (pkg, "")
def get_version_numbers(version):
@@ -93,40 +105,29 @@ def is_newer_version(args, prev_version, prev_id, check_version, check_id):
(get_version_numbers(prev_version), prev_id))
def find_dl_path(args, name):
"""Ask crates.io for the latest version download path."""
base_name, version = pkg_base_name(args, name)
url = "https://crates.io/api/v1/crates/{}/versions".format(base_name)
echo(args, "get versions at {}".format(url))
def get_max_version(pkg):
"""Ask crates.io for a pkg's latest version."""
url = "https://crates.io/api/v1/crates/" + pkg
with urllib.request.urlopen(url) as request:
data = json.loads(request.read().decode())
# version with the largest id number is assumed to be the latest
last_id = 0
dl_path = ""
found_version = ""
for v in data["versions"]:
# Return the given version if it is found.
if version == v["num"]:
dl_path = v["dl_path"]
found_version = version
break
if version: # must find user specified version
continue
# Skip yanked version.
if v["yanked"]:
echo(args, "skip yanked version {}".format(v["num"]))
continue
# Remember the newest version.
if is_newer_version(args, found_version, last_id, v["num"], int(v["id"])):
last_id = int(v["id"])
found_version = v["num"]
dl_path = v["dl_path"]
if not dl_path:
print("ERROR: cannot find version {} of package {}"
.format(version, base_name))
return data["crate"]["max_version"]
def find_dl_path(args, name):
"""Ask crates.io for the latest version download path."""
base_name, version = pkg_base_name(name)
if not version:
version = get_max_version(name)
url = "https://crates.io/api/v1/crates/{}/{}".format(base_name, version)
echo(args, "try to get dl_path from {}".format(url))
with urllib.request.urlopen(url) as request:
data = json.loads(request.read().decode())
if "version" not in data or "dl_path" not in data["version"]:
print("ERROR: cannot find version {} of package {}".format(
version, base_name))
return None
echo(args, "found download path for version {}".format(found_version))
return dl_path
echo(args, "found download path for version {}".format(version))
return data["version"]["dl_path"]
def fetch_pkg(args, dl_path):
@@ -160,14 +161,258 @@ def fetch_pkg(args, dl_path):
return True
def get_crate_dependencies(args, pkg):
"""Ask crates.io for pkg's dependencies."""
echo(args, "Ask crates.io for {} ...".format(pkg))
try:
url = "https://crates.io/api/v1/crates/{}/{}/dependencies".format(
pkg, get_max_version(pkg))
with urllib.request.urlopen(url) as request:
data = json.loads(request.read().decode())
except urllib.error.HTTPError:
print("ERROR: failed to find {}".format(pkg))
return False, None, None
build_deps = set()
dev_deps = set()
for crate in data["dependencies"]:
if not crate["optional"]: # some package has a lot of optional features
# dev_deps is a super set of build_deps
dev_deps.add(crate["crate_id"])
if crate["kind"] != "dev":
build_deps.add(crate["crate_id"])
return True, build_deps, dev_deps
def compare_pkg_deps(pkg1, pkg2):
"""Compare dependency order of pkg1 and pkg2."""
base1, build_deps1, dev_deps1 = pkg1
base2, build_deps2, dev_deps2 = pkg2
# Some pkg1 can be build-dependent (non-dev-dependent) on pkg2,
# when pkg2 is only test-dependent (dev-dependent) on pkg1.
# This is not really a build dependency cycle, because pkg2
# can be built before pkg1, but tested after pkg1.
# So the dependency order is based on build_deps first, and then dev_deps.
if base1 in build_deps2:
return -1 # pkg2 needs base1
if base2 in build_deps1:
return 1 # pkg1 needs base2
if base1 in dev_deps2:
return -1 # pkg2 needs base1
if base2 in dev_deps1:
return 1 # pkg1 needs base2
# If there is no dependency between pkg1 and pkg2,
# order them by the size of build_deps or dev_deps, or the name.
count1 = (len(build_deps1), len(dev_deps1), base1)
count2 = (len(build_deps2), len(dev_deps2), base2)
if count1 != count2:
return -1 if count1 < count2 else 1
return 0
def sort_found_pkgs(tuples):
"""A topological sort of tuples based on build_deps."""
# tuples is a list of (base_name, build_deps, dev_deps)
# Use build_deps as the dependency relation in a topological sort.
# The new_tuples list is used in topological sort. It is the input tuples
# prefixed with a changing build_deps during the sorting process.
# Collect all package base names.
# Dependent packages not found in all_base_names will be treated as
# "external" and ignored in topological sort.
all_base_names = set(map(lambda t: t[0], tuples))
new_tuples = []
all_names = set()
for (base_name, build_deps, dev_deps) in tuples:
new_tuples.append((build_deps, (base_name, build_deps, dev_deps)))
all_names = all_names.union(build_deps)
external_names = all_names.difference(all_base_names)
new_tuples = list(
map(lambda t: (t[0].difference(external_names), t[1]), new_tuples))
sorted_tuples = []
# A brute force topological sort;
# tuples with empty build_deps are put before the others.
while new_tuples:
first_group = list(filter(lambda t: not t[0], new_tuples))
other_group = list(filter(lambda t: t[0], new_tuples))
new_tuples = []
if first_group:
# Remove the extra build_deps in first_group,
# then sort it, and add its tuples to the sorted_tuples list.
first_group = list(map(lambda t: t[1], first_group))
first_group.sort(key=functools.cmp_to_key(compare_pkg_deps))
sorted_tuples.extend(first_group)
# Copy other_group to new_tuples but remove names in the first_group.
base_names = set(map(lambda t: t[0], first_group))
new_tuples = list(
map(lambda t: (t[0].difference(base_names), t[1]), other_group))
else:
# There is a bug, or a cycle in the build_deps.
# If we include all optional dependent packages into build_deps,
# we will see one cycle: futures-util has an optional dependent
# on futures, which has a normal dependent on futures-util.
print("ERROR: leftover in topological sort: {}".format(
list(map(lambda t: t[1][1], other_group))))
# Anyway, sort the other_group to include them into final report.
other_group = list(map(lambda t: t[1], other_group))
other_group.sort(key=functools.cmp_to_key(compare_pkg_deps))
sorted_tuples.extend(other_group)
return sorted_tuples
def show_all_dependencies(args, found_pkgs):
"""Topological sort found_pkgs and report number of dependent packages."""
found_pkgs = sort_found_pkgs(found_pkgs)
max_pkg_length = 1
for (pkg, _, _) in found_pkgs:
max_pkg_length = max(max_pkg_length, len(pkg))
name_format = "{:" + str(max_pkg_length) + "s}"
print("\n##### Summary of all dependent package counts #####")
print("build_deps[k] = # of non-dev-dependent packages of pkg[k]")
print("dev_deps[k] = # of all dependent packages of pkg[k]")
print(
"all_build_deps[k] = # of non-dev-dependent packages of pkg[1] to pkg[k]")
print("all_dev_deps[k] = # of all dependent packages of pkg[1] to pkg[k]")
print(("{:>4s} " + name_format + " {:>10s} {:>10s} {:>14s} {:>14s}").format(
"k", "pkg", "build_deps", "dev_deps", "all_build_deps", "all_dev_deps"))
all_pkgs = set()
all_build_deps = set()
all_dev_deps = set()
k = 0
for (pkg, build_deps, dev_deps) in found_pkgs:
all_pkgs.add(pkg)
all_build_deps = all_build_deps.union(build_deps).difference(all_pkgs)
all_dev_deps = all_dev_deps.union(dev_deps).difference(all_pkgs)
k += 1
print(("{:4d} " + name_format + " {:10d} {:10d} {:14d} {:14d}").format(
k, pkg, len(build_deps), len(dev_deps), len(all_build_deps),
len(all_dev_deps)))
echo_all_deps(args, "all_build_deps", all_build_deps)
echo_all_deps(args, "all_dev_deps", all_dev_deps)
print("\nNOTE: from all {} package(s):{}".format(
len(all_pkgs), set2list(all_pkgs)))
print("NOTE: found {:3d} other non-dev-dependent package(s):{}".format(
len(all_build_deps), set2list(all_build_deps)))
print("NOTE: found {:3d} other dependent package(s):{}".format(
len(all_dev_deps), set2list(all_dev_deps)))
def crates_io_find_pkgs(args, pkgs, found_pkgs):
"""Call crates.io api to find direct dependent packages."""
success = True
for pkg in sorted(pkgs):
ok, build_deps, dev_deps = get_crate_dependencies(args, pkg)
if not ok:
success = False
else:
found_pkgs.append((pkg, build_deps, dev_deps))
return success
def add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg):
"""Add reachable non-dev dependencies to all_deps[pkg]'s dependencies."""
if pkg not in all_deps:
ok, build_deps, dev_deps = get_crate_dependencies(args, pkg)
if not ok:
return set()
all_deps[pkg] = (pkg, build_deps, dev_deps)
else:
(_, build_deps, dev_deps) = all_deps[pkg]
if pkg in visited:
return build_deps
visited.add(pkg)
for p in sorted(build_deps):
# If p was visited before as a non-core package and now added
# into core_pkgs, its dev_deps should be revisited .
revisit_dev_deps = False
if pkg in core_pkgs and p not in core_pkgs:
core_pkgs.add(p)
revisit_dev_deps = True
deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p)
build_deps = build_deps.union(deps)
if revisit_dev_deps:
(_, p_build_deps, p_dev_deps) = all_deps[p]
for q in sorted(p_dev_deps):
deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, q)
p_dev_deps = p_dev_deps.union(deps)
all_deps[p] = (p, p_build_deps, p_dev_deps)
if pkg in core_pkgs:
for p in sorted(dev_deps):
deps = add_non_dev_dependencies(args, all_deps, core_pkgs, visited, p)
dev_deps = dev_deps.union(deps)
all_deps[pkg] = (pkg, build_deps, dev_deps)
return build_deps
def add_indirect_build_deps(args, found_pkgs):
"""Add all indirect dependencies and return a new found_pkgs."""
all_deps = dict(map(lambda t: (t[0], t), found_pkgs))
core_pkgs = set(map(lambda t: t[0], found_pkgs))
dev_pkgs = set()
dump_pkgs(args, "BEFORE", all_deps, core_pkgs, dev_pkgs)
visited = set()
for pkg in sorted(core_pkgs):
add_non_dev_dependencies(args, all_deps, core_pkgs, visited, pkg)
dev_pkgs = visited.difference(core_pkgs)
dump_pkgs(args, "AFTER", all_deps, core_pkgs, dev_pkgs)
found_pkgs = list(map(lambda p: all_deps[p], core_pkgs))
found_pkgs.sort(key=lambda t: t[0])
return found_pkgs
def echo_dump_found_pkgs(args, msg, all_deps, pkgs):
if not args.v or not pkgs:
return
echo(args, msg)
for pkg in sorted(pkgs):
(_, build_deps, dev_deps) = all_deps[pkg]
for (name, deps) in [("normal", build_deps), ("dev", dev_deps)]:
pattern = " {} has {} " + name + " deps:{}"
echo(args, pattern.format(pkg, len(deps), set2list(deps)))
def dump_pkgs(args, msg, all_deps, core_pkgs, dev_pkgs):
echo_dump_found_pkgs(args, msg + " core_pkgs:", all_deps, core_pkgs)
echo_dump_found_pkgs(args, msg + " other_dev_pkgs:", all_deps, dev_pkgs)
def show_dependencies(args):
"""Calls crates.io api to find dependent packages; returns True on success."""
all_pkgs = set(map(lambda p: pkg_base_name(p)[0], args.pkgs))
if "" in all_pkgs:
# TODO(chh): detect and report ill formed names in args.pkgs
print("WARNING: skip some ill formatted pkg arguments.")
all_pkgs = all_pkgs.remove("")
if not all_pkgs:
print("ERROR: no valid pkg names to show dependencies.")
return False
pkgs = sorted(all_pkgs)
found_pkgs = []
success = crates_io_find_pkgs(args, pkgs, found_pkgs)
if not found_pkgs:
return False
# All normal (non-dev) dependent packages will be added into found_pkgs.
found_pkgs = add_indirect_build_deps(args, found_pkgs)
show_all_dependencies(args, found_pkgs)
return success
def main():
args = parse_args()
packages = list(dict.fromkeys(args.pkgs))
echo(args, "to fetch packags = {}".format(packages))
if args.show:
# only show dependencies, not to fetch any package
if not show_dependencies(args):
sys.exit(2)
return
echo(args, "to fetch packages = {}".format(args.pkgs))
errors = []
for pkg in packages:
for pkg in args.pkgs:
echo(args, "trying to fetch package {}".format(pkg))
if not fetch_pkg(args, find_dl_path(args, pkg)):
try:
if not fetch_pkg(args, find_dl_path(args, pkg)):
errors.append(pkg)
except urllib.error.HTTPError:
errors.append(pkg)
if errors:
for pkg in errors: