vendor.py - chromiumos/third_party/rust_crates - Git at Google

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # Copyright 2021 The ChromiumOS Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """This script cleans up the vendor directory."""

 import argparse
 import collections
 import copy
 import functools
 import hashlib
 import itertools
 import json
 import logging
 import os
 from pathlib import Path
 import re
 import shutil
 import subprocess
 import sys
 import textwrap
 from typing import Any, Dict, List, Optional, Sequence, Set, Tuple


 sys.path.append(str(Path(__file__).resolve().parent / "scripts"))

 import rust_crates


 rust_crates.die_if_running_as_root()
 rust_crates.run_inside_chroot()
 rust_crates.install_tomli_and_reexec_if_unavailable()

 import tomli
 import tomli_w


 # Eg. crate(-1.2.3+blah)?
 _PATCH_VERSION_REGEX = re.compile(r"^(.*?)(?:-(\d+\.\d+\.\d+(?:\+.*)?)?)?$")
 _METALLURGY_CARGO_TOML_HEADER = """# Copyright 2023 The ChromiumOS Authors.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 #
 # !! Autogenerated by `vendor.py`; please don't edit. !!

 [package]
 name = "metallurgy-crates"
 version = "0.0.1"

 [workspace]

 [dependencies]
 """

 _FAKE_NO_STD_CRATES = (
     # num-* uses std feature to toggle std in dependencies.
     # This is the subset of num-* that requires std to function.
     "num",
     "num-bigint",
     "num-rational",
     # svd2rust depends on env_logger, which relies on std, and has the
     # following line in the Cargo.toml:
     # log = { version = "~0.4", features = ["std"] }
     "svd2rust",
 )
 _NO_STD_FEATURES = ("no-std", "no_std")
 _STD_FEATURES = ("std", "use_std")

 # We only care about crates we're actually going to use and that's usually
 # limited to ones with cfg(linux). For running `cargo metadata`, limit results
 # to only these platforms.
 ALL_SUPPORTED_PLATFORMS = (
     # Main targets.
     "x86_64-cros-linux-gnu",
     "armv7a-cros-linux-gnueabihf",
     "aarch64-cros-linux-gnu",
     # As far as we care, this is the same as x86_64-cros-linux-gnu.
     # "x86_64-pc-linux-gnu",
     # Baremetal targets.
     "thumbv6m-none-eabi",
     "thumbv7m-none-eabi",
     "thumbv7em-none-eabihf",
     "i686-unknown-uefi",
     "x86_64-unknown-uefi",
 )

 # A series of crates which are to be made empty by having no (non-comment)
 # contents in their `lib.rs`, rather than by inserting a compilation error.
 NOP_EMPTY_CRATES = frozenset({"windows"})

 EMPTY_CRATE_BODY = """\
 compile_error!("This crate cannot be built for this configuration.");
 """
 NOP_EMPTY_CRATE_BODY = "// " + EMPTY_CRATE_BODY

 _ALL_MODES = {"alchemy", "std", "no_std"}

 # This ends up being a JSON-y map that provides data for
 # https://bazelbuild.github.io/rules_rust/crate_universe.html#crateannotation
 BazelAnnotation = Dict[str, Any]


 def _rerun_checksums(package_path):
     """Re-run checksums for given package.

     Writes resulting checksums to $package_path/.cargo-checksum.json.
     """
     hashes = dict()
     checksum_path = os.path.join(package_path, ".cargo-checksum.json")
     if not Path(checksum_path).is_file():
         return False

     with open(checksum_path, "r") as fread:
         contents = json.load(fread)

     for root, _, files in os.walk(package_path, topdown=True):
         for f in files:
             # Don't checksum an existing checksum file
             if f == ".cargo-checksum.json":
                 continue

             file_path = os.path.join(root, f)
             with open(file_path, "rb") as frb:
                 m = hashlib.sha256()
                 m.update(frb.read())
                 d = m.hexdigest()

                 # Key is relative to the package path so strip from beginning
                 key = os.path.relpath(file_path, package_path)
                 hashes[key] = d

     if hashes:
         logging.info(
             "%s regenerated %d hashes", package_path, len(hashes.keys())
         )
         contents["files"] = hashes
         with open(checksum_path, "w") as fwrite:
             json.dump(contents, fwrite, sort_keys=True)

     return True


 def _remove_OWNERS_checksum(root):
     """Delete all OWNERS files from the checksum file.

     Args:
         root: Root directory for the vendored crate.

     Returns:
         True if OWNERS was found and cleaned up. Otherwise False.
     """
     checksum_path = os.path.join(root, ".cargo-checksum.json")
     if not Path(checksum_path).is_file():
         return False

     with open(checksum_path, "r") as fread:
         contents = json.load(fread)

     del_keys = []
     for cfile in contents["files"]:
         if "OWNERS" in cfile:
             del_keys.append(cfile)

     for key in del_keys:
         del contents["files"][key]

     if del_keys:
         logging.info("%s deleted: %s", root, del_keys)
         with open(checksum_path, "w") as fwrite:
             json.dump(contents, fwrite, sort_keys=True)

     return bool(del_keys)


 def cleanup_owners(vendor_path):
     """Remove owners checksums from the vendor directory.

     We currently do not check in the OWNERS files from vendored crates because
     they interfere with the find-owners functionality in gerrit. This cleanup
     simply finds all instances of "OWNERS" in the checksum files within and
     removes them.

     Args:
         vendor_path: Absolute path to vendor directory.
     """
     deps_cleaned = []
     for root, dirs, _ in os.walk(vendor_path):
         for d in dirs:
             removed = _remove_OWNERS_checksum(os.path.join(root, d))
             if removed:
                 deps_cleaned.append(d)

     if deps_cleaned:
         logging.info("Cleanup owners:\n%s", "\n".join(deps_cleaned))


 def apply_single_patch(patch, workdir):
     """Apply a single patch and return whether it was successful.

     Returns:
         True if successful. False otherwise.
     """
     proc = subprocess.run(
         [
             "patch",
             "-p1",
             "--no-backup-if-mismatch",
             "-i",
             patch,
         ],
         cwd=workdir,
     )
     return proc.returncode == 0


 def apply_patch_script(script, workdir):
     """Run the given patch script, returning whether it exited cleanly.

     Returns:
         True if successful. False otherwise.
     """
     return subprocess.run([script], cwd=workdir).returncode == 0


 def determine_vendor_crates(vendor_path):
     """Returns a map of {crate_name: [directory]} at the given vendor_path."""
     result = collections.defaultdict(list)
     crate_version_re = re.compile(r"-\d+\.\d+\.\d+(:?[+-]|$)")
     for crate_name_plus_ver in os.listdir(vendor_path):
         version = crate_version_re.search(crate_name_plus_ver)
         assert version, crate_name_plus_ver
         name = crate_name_plus_ver[: version.start()]
         result[name].append(crate_name_plus_ver)

     for crate_list in result.values():
         crate_list.sort()
     return result


 def apply_patches(patches_path, vendor_path):
     """Finds patches and applies them to sub-folders in the vendored crates.

     Args:
         patches_path: Path to folder with patches. Expect all patches to be one
                     level down (matching the crate name).
         vendor_path: Root path to vendored crates directory.
     """
     checksums_for = {}

     # Don't bother running if patches directory is empty
     if not Path(patches_path).is_dir():
         return

     patches_failed = False
     vendor_crate_map = determine_vendor_crates(vendor_path)
     # Look for all patches and apply them
     for d in os.listdir(patches_path):
         dir_path = os.path.join(patches_path, d)

         # We don't process patches in root dir
         if not os.path.isdir(dir_path):
             continue

         # We accept one of two forms here:
         # - direct targets (these name # `${crate_name}-${version}`)
         # - simply the crate name (which applies to all versions of the
         #   crate)
         direct_target = os.path.join(vendor_path, d)
         if os.path.isdir(direct_target):
             patch_targets = [d]
         elif d in vendor_crate_map:
             patch_targets = vendor_crate_map[d]
         else:
             raise RuntimeError(f"Unknown crate in {vendor_path}: {d}")

         for patch in os.listdir(dir_path):
             file_path = os.path.join(dir_path, patch)

             # Skip if not a patch file
             if not os.path.isfile(file_path):
                 continue

             if patch.endswith(".patch"):
                 apply = apply_single_patch
             elif os.access(file_path, os.X_OK):
                 apply = apply_patch_script
             else:
                 # Unrecognized. Skip it.
                 continue

             for target_name in patch_targets:
                 checksums_for[target_name] = True
                 target = os.path.join(vendor_path, target_name)
                 logging.info("Applying %s to %s", file_path, target)
                 if not apply(file_path, target):
                     logging.error("Failed to apply %s to %s", file_path, target)
                     patches_failed = True

     # Do this late, so we can report all of the failing patches in one
     # invocation.
     if patches_failed:
         raise ValueError("Patches failed; please see above logs")

     # Re-run checksums for all modified packages since we applied patches.
     for key in checksums_for.keys():
         _rerun_checksums(os.path.join(vendor_path, key))


 def generate_patches_manifest(
     patch_dirs: Dict[str, Path],
 ) -> Dict[str, List[BazelAnnotation]]:
     """Returns a dictionary containing json configuration of the patch file."""
     patches = collections.defaultdict(list)
     for label, patch_dir in patch_dirs.items():
         for d in sorted(patch_dir.iterdir()):
             if d.is_dir():
                 crate, version = _PATCH_VERSION_REGEX.match(d.name).groups()

                 patch_files = [
                     p.relative_to(patch_dir) for p in d.glob("*.patch")
                 ]
                 # Some directories instead have shell scripts to remove the
                 # executable bit from files. We don't care about these ones,
                 # since we're not vendoring with bazel, which won't let you
                 # execute them anyway.
                 if patch_files:
                     # `glob` has no ordering guarantees, so sort to ensure output
                     # independent of dirent ordering.
                     patch_files.sort()
                     patch = dict(
                         version=version or "*",
                         # The default bazel patch tool doesn't support fuzzing,
                         # which vendor relies on.
                         patch_tool="patch",
                         patch_args=["-p1"],
                         patches=[
                             label.format(patch=patch) for patch in patch_files
                         ],
                     )
                     patches[crate].append(patch)
     return patches


 def get_workspace_cargo_toml(working_dir):
     """Returns all Cargo.toml files under working_dir."""
     return [working_dir / "projects" / "Cargo.toml"]


 def run_cargo_vendor(working_dir):
     """Runs cargo vendor.

     Args:
         working_dir: Directory to run inside. This should be the directory where
                      Cargo.toml is kept.
     """
     # `cargo vendor` may update dependencies (which may update metadata).
     load_all_package_metadata.cache_clear()

     # Cargo will refuse to revendor into versioned directories, which leads to
     # repeated `./vendor.py` invocations trying to apply patches to
     # already-patched sources. Remove the existing vendor directory to avoid
     # this.
     vendor_dir = working_dir / "vendor"
     if vendor_dir.exists():
         shutil.rmtree(vendor_dir)

     cargo_cmdline = [
         "cargo",
         "vendor",
         "--versioned-dirs",
         "-v",
         "--manifest-path=projects/Cargo.toml",
         "--",
         "vendor",
     ]
     subprocess.check_call(cargo_cmdline, cwd=working_dir)


 def load_single_metadata(working_dir, filter_platform):
     """Load metadata for all projects under a given directory.

     Args:
         working_dir: Base directory to run from.
         filter_platform: Filter packages to ones configured for this platform.
     """
     cmd = [
         "cargo",
         "metadata",
         "--format-version=1",
         # Use `--quiet` here, since cargo may warn about dependencies which
         # don't strictly make sense (e.g., some third-party packages depend on
         # things like bindgen-cli, which doesn't have a library target).
         #
         # We don't care. :)
         "--quiet",
         "--manifest-path=projects/Cargo.toml",
     ]
     # Conditionally add platform filter
     if filter_platform:
         cmd += ("--filter-platform", filter_platform)
     output = subprocess.check_output(cmd, cwd=working_dir)
     return json.loads(output)


 # Calls to this are somewhat expensive, and repeated a fair few times
 # throughout `./vendor.py`. Measuring locally, having a cache here speeds this
 # script up by 1.4x.
 @functools.lru_cache()
 def load_all_package_metadata(working_dir, platforms=ALL_SUPPORTED_PLATFORMS):
     """Loads and merges metadata for all platforms in `platforms`.

     This drops a lot of data from `cargo metadata`. Some of this metadata is
     hard to merge, other bits of it just aren't worth keeping at the moment.
     """
     assert platforms, f"`platforms` should have things; has {platforms}"

     found_package_ids = set()
     results = []
     for platform in platforms:
         metadata = load_single_metadata(working_dir, platform)["packages"]
         for package in metadata:
             package_id = package["id"]
             if package_id in found_package_ids:
                 continue

             found_package_ids.add(package_id)
             results.append(
                 {
                     "id": package["id"],
                     "license": package["license"],
                     "license_file": package["license_file"],
                     "name": package["name"],
                     "version": package["version"],
                 }
             )

     return results


 class LicenseManager:
     """Manage consolidating licenses for all packages."""

     # These are all the licenses we support. Keys are what is seen in metadata
     # and values are what is expected by ebuilds.
     SUPPORTED_LICENSES = {
         "0BSD": "0BSD",
         "Apache-2.0": "Apache-2.0",
         "BSD-2-Clause": "BSD-2",
         "BSD-3-Clause": "BSD-3",
         "ISC": "ISC",
         "MIT": "MIT",
         "MPL-2.0": "MPL-2.0",
         "unicode": "unicode",
         "Unicode-3.0": "unicode",
         "Zlib": "ZLIB",
     }

     # Prefer to take attribution licenses in this order. All these require that
     # we actually use the license file found in the package so they MUST have
     # a license file set.
     PREFERRED_ATTRIB_LICENSE_ORDER = ["MIT", "BSD-3", "ISC"]

     # If Apache license is found, always prefer it (simplifies attribution)
     APACHE_LICENSE = "Apache-2.0"

     # Regex for license files found in the vendored directories. Search for
     # these files with re.IGNORECASE.
     #
     # These will be searched in order with the earlier entries being preferred.
     LICENSE_NAMES_REGEX = [
         r"^license-mit$",
         r"^copyright$",
         r"^licen[cs]e.*$",
     ]

     # Some crates have their license file in other crates. This usually occurs
     # because multiple crates are published from the same git repository and the
     # license isn't updated in each sub-crate. In these cases, we can just
     # ignore these packages.
     MAP_LICENSE_TO_OTHER = {
         "blazesym-c": "blazesym",
         "failure_derive": "failure",
         "grpcio-compiler": "grpcio",
         "grpcio-sys": "grpcio",
         "mocktopus_macros": "mocktopus",
         "protobuf-codegen": "protobuf",
         "protobuf-parse": "protobuf",
         "protobuf-support": "protobuf",
         "rustyline-derive": "rustyline",
     }

     # Map a package to a specific license and license file. Only use this if
     # a package doesn't have an easily discoverable license or exports its
     # license in a weird way. Prefer to patch the project with a license and
     # upstream the patch instead.
     STATIC_LICENSE_MAP = {
         # "package name": ("license name", "license file relative location")
         # Patch for adding these are upstream, but the patch application
         # doesn't apply to `cargo metadata`. This is presumably because it
         # can't detect our vendor directory.
         # https://gitlab.freedesktop.org/slirp/libslirp-sys/-/merge_requests/6
         "libslirp-sys": ("MIT", "LICENSE"),
         # https://gitlab.freedesktop.org/anholt/deqp-runner/-/merge_requests/48
         "deqp-runner": ("MIT", "LICENSE"),
         # https://github.com/DimaKudosh/difflib/blob/master/LICENSE
         "difflib": ("MIT", "LICENSE"),
         # Upstream prefers to embed license text inside README.md:
         "riscv": ("ISC", "README.md"),
         "riscv-rt": ("ISC", "README.md"),
         "zerocopy": ("BSD-2", "LICENSE"),
         "zerocopy-derive": ("BSD-2", "LICENSE"),
     }

     def __init__(self, working_dir, vendor_dir):
         self.working_dir = working_dir
         self.vendor_dir = vendor_dir

     def _find_license_in_dir(self, search_dir):
         for p in os.listdir(search_dir):
             # Ignore anything that's not a file
             if not os.path.isfile(os.path.join(search_dir, p)):
                 continue

             # Now check if the name matches any of the regexes
             # We'll return the first matching file.
             for regex in self.LICENSE_NAMES_REGEX:
                 if re.search(regex, p, re.IGNORECASE):
                     yield os.path.join(search_dir, p)
                     break

     def _guess_license_type(self, license_file):
         if "-MIT" in license_file:
             return "MIT"
         elif "-APACHE" in license_file:
             return "APACHE"
         elif "-BSD" in license_file:
             return "BSD-3"

         with open(license_file, "r") as f:
             lines = f.read()
             if "MIT" in lines:
                 return "MIT"
             elif "Apache" in lines:
                 return "APACHE"
             elif "BSD 3-Clause" in lines:
                 return "BSD-3"

         return ""

     def generate_license(
         self,
         skip_license_check,
         print_map_to_file,
         license_shorthand_file,
         destroyed_crates,
     ):
         """Generate single massive license file from metadata."""
         metadata = load_all_package_metadata(self.working_dir)

         special_unicode_license = "(MIT OR Apache-2.0) AND Unicode-DFS-2016"
         special_whatwg_license = "(Apache-2.0 OR MIT) AND BSD-3-Clause"
         bad_licenses = {}

         # Keep license map ordered so it generates a consistent license map
         license_map = {}

         skip_license_check = skip_license_check or []
         has_unicode_license = False

         for package in metadata:
             # Skip the synthesized Cargo.toml packages that exist solely to
             # list dependencies.
             if "path+file:///" in package["id"]:
                 continue

             pkg_name = package["name"]
             pkg_version = package["version"]
             if pkg_name in skip_license_check:
                 logging.info(
                     "Skipped license check on %s. Reason: Skipped from command line",
                     pkg_name,
                 )
                 continue

             # Skip the license check for packages we have destroyed.
             if (pkg_name, pkg_version) in destroyed_crates:
                 continue

             if pkg_name in self.MAP_LICENSE_TO_OTHER:
                 logging.info(
                     "Skipped license check on %s. Reason: License already in %s",
                     pkg_name,
                     self.MAP_LICENSE_TO_OTHER[pkg_name],
                 )
                 continue

             # Check if we have a static license map for this package. Use the
             # static values if we have it already set.
             if pkg_name in self.STATIC_LICENSE_MAP:
                 license, license_file = self.STATIC_LICENSE_MAP[pkg_name]
                 license_map[pkg_name] = {
                     "license": license,
                     "license_file": license_file,
                 }
                 continue

             license_files = []
             # use `or ''` instead of get's default, since `package` may have a
             # None value for 'license'.
             license = package.get("license") or ""

             # We ignore the metadata for license file because most crates don't
             # have it set. Just scan the source for licenses.
             license_files = list(
                 self._find_license_in_dir(
                     os.path.join(self.vendor_dir, f"{pkg_name}-{pkg_version}")
                 )
             )

             # FIXME(b/240953811): The code later in this loop is only
             # structured to handle ORs, not ANDs. Fortunately, this license in
             # particular is `AND`ed between a super common license (Apache) and
             # a more obscure one (unicode). This hack is specifically intended
             # for the `unicode-ident` crate, though no crate name check is
             # made, since it's OK other crates happen to have this license.
             if license == special_unicode_license:
                 has_unicode_license = True
                 # We'll check later to be sure MIT or Apache-2.0 is represented
                 # properly.
                 for x in license_files:
                     if os.path.basename(x) == "LICENSE-UNICODE":
                         license_file = x
                         break
                 else:
                     raise ValueError(
                         "No LICENSE-UNICODE found in " f"{license_files}"
                     )
                 license_map[pkg_name] = {
                     "license": license,
                     "license_file": license_file,
                 }
                 continue
             # FIXME(b/240953811): This it the same hack as above for
             # `unicode-ident`, except this time it handles the license of the
             # `encoding_rs` crate.
             if license == special_whatwg_license:
                 has_whatwg_license = True
                 # We'll check later to be sure MIT or Apache-2.0 is represented
                 # properly.
                 for x in license_files:
                     if os.path.basename(x) == "LICENSE-WHATWG":
                         license_file = x
                         break
                 else:
                     raise ValueError(
                         "No LICENSE-WHATWG found in " f"{license_files}"
                     )
                 license_map[pkg_name] = {
                     "license": license,
                     "license_file": license_file,
                 }
                 continue

             # If there are multiple licenses, they are delimited with "OR" or "/"
             delim = " OR " if " OR " in license else "/"
             found = [x.strip() for x in license.split(delim)]

             # Filter licenses to ones we support
             licenses_or = [
                 self.SUPPORTED_LICENSES[f]
                 for f in found
                 if f in self.SUPPORTED_LICENSES
             ]

             # If apache license is found, always prefer it because it simplifies
             # license attribution (we can use existing Apache notice)
             if self.APACHE_LICENSE in licenses_or:
                 license_map[pkg_name] = {"license": self.APACHE_LICENSE}

             # Handle single license that has at least one license file
             # We pick the first license file and the license
             elif len(licenses_or) == 1:
                 if license_files:
                     l = licenses_or[0]
                     lf = license_files[0]

                     license_map[pkg_name] = {
                         "license": l,
                         "license_file": os.path.relpath(lf, self.working_dir),
                     }
                 else:
                     bad_licenses[pkg_name] = "{} missing license file".format(
                         licenses_or[0]
                     )
             # Handle multiple licenses
             elif len(licenses_or) > 1:
                 # Check preferred licenses in order
                 license_found = False
                 for l in self.PREFERRED_ATTRIB_LICENSE_ORDER:
                     if not l in licenses_or:
                         continue

                     for f in license_files:
                         if self._guess_license_type(f) == l:
                             license_found = True
                             license_map[pkg_name] = {
                                 "license": l,
                                 "license_file": os.path.relpath(
                                     f, self.working_dir
                                 ),
                             }
                             break

                     # Break out of loop if license is found
                     if license_found:
                         break
             else:
                 bad_licenses[pkg_name] = license

         # If we had any bad licenses, we need to abort
         if bad_licenses:
             for k in bad_licenses.keys():
                 logging.error(
                     "%s had no acceptable licenses: %s", k, bad_licenses[k]
                 )
             raise Exception("Bad licenses in vendored packages.")

         # Write license map to file
         if print_map_to_file:
             with open(
                 os.path.join(self.working_dir, print_map_to_file), "w"
             ) as lfile:
                 json.dump(license_map, lfile, sort_keys=True)

         # Raise missing licenses unless we have a valid reason to ignore them
         raise_missing_license = False
         for name, v in license_map.items():
             if (
                 "license_file" not in v
                 and v.get("license", "") != self.APACHE_LICENSE
             ):
                 raise_missing_license = True
                 logging.error(
                     "  %s: Missing license file. Fix or add to ignorelist.",
                     name,
                 )

         if raise_missing_license:
             raise Exception(
                 "Unhandled missing license file. "
                 "Make sure all are accounted for before continuing."
             )

         has_license_types = {x["license"] for x in license_map.values()}
         if has_unicode_license:
             # Replace this license with the actual SPDX license we plan to use.
             has_license_types.remove(special_unicode_license)
             has_license_types.add("unicode")
             if self.APACHE_LICENSE not in has_license_types:
                 raise ValueError(
                     "Need the apache license; currently have: "
                     f"{sorted(has_license_types)}"
                 )
         if has_whatwg_license:
             # Replace this license with the actual SPDX license we plan to use.
             has_license_types.remove(special_whatwg_license)
             has_license_types.add("BSD-3")
             if self.APACHE_LICENSE not in has_license_types:
                 raise ValueError(
                     "Need the apache license; currently have: "
                     f"{sorted(has_license_types)}"
                 )

         sorted_licenses = sorted(has_license_types)
         logging.info("The following licenses are in use: %s", sorted_licenses)
         header = textwrap.dedent(
             """\
             # File to describe the licenses used by this registry.
             # Used so it's easy to automatically verify ebuilds are updated.
             # Each line is a license. Lines starting with # are comments.
             """
         )
         with open(license_shorthand_file, "w", encoding="utf-8") as f:
             f.write(header)
             f.write("\n".join(sorted_licenses))
             # Ensure there's a newline at the end to appease `cros format`.
             f.write("\n")


 def clean_source_related_lines_in_place(cargo_toml):
     """Removes all [[bin]] (and similar) sections in `cargo_toml`."""
     cargo_toml.pop("bench", None)
     cargo_toml.pop("bin", None)
     cargo_toml.pop("examples", None)
     cargo_toml.pop("test", None)

     lib = cargo_toml.get("lib")
     if lib:
         lib.pop("path", None)

     package = cargo_toml.get("package")
     if package:
         package.pop("build", None)
         package.pop("default-run", None)
         package.pop("include", None)


 def clean_features_in_place(cargo_toml):
     """Removes all side-effects of features in `cargo_toml`."""
     features = cargo_toml.get("features")
     if not features:
         return

     for name in features:
         features[name] = []


 def remove_all_dependencies_in_place(cargo_toml):
     """Removes all `target.*.dependencies` from `cargo_toml`."""
     cargo_toml.pop("build-dependencies", None)
     cargo_toml.pop("dependencies", None)
     cargo_toml.pop("dev-dependencies", None)

     target = cargo_toml.get("target")
     if not target:
         return

     empty_keys = []
     for key, values in target.items():
         values.pop("build-dependencies", None)
         values.pop("dependencies", None)
         values.pop("dev-dependencies", None)
         if not values:
             empty_keys.append(key)

     if len(empty_keys) == len(target):
         del cargo_toml["target"]
     else:
         for key in empty_keys:
             del target[key]


 class CrateDestroyer:
     def __init__(self, working_dir, vendor_dir):
         self.working_dir = working_dir
         self.vendor_dir = vendor_dir

     def _modify_cargo_toml(self, pkg_path):
         with open(os.path.join(pkg_path, "Cargo.toml"), "rb") as cargo:
             contents = tomli.load(cargo)

         package = contents["package"]

         # Change description, license and delete license key
         package["description"] = "Empty crate that should not build."
         package["license"] = "Apache-2.0"

         package.pop("license_file", None)
         # If there's no build.rs but we specify `links = "foo"`, Cargo gets
         # upset.
         package.pop("links", None)

         # Some packages have cfg-specific dependencies. Remove them here; we
         # don't care about the dependencies of an empty package.
         #
         # This is a load-bearing optimization: `dev-python/toml` doesn't
         # always round-trip dumps(loads(x)) correctly when `x` has keys with
         # strings (b/242589711#comment3). The place this has bitten us so far
         # is target dependencies, which can be harmlessly removed for now.
         #
         # Cleaning features in-place is also necessary, since we're removing
         # dependencies, and a feature can enable features in dependencies.
         # Cargo errors out on `[features] foo = "bar/baz"` if `bar` isn't a
         # dependency.
         clean_features_in_place(contents)
         remove_all_dependencies_in_place(contents)

         # Since we're removing all source files, also be sure to remove
         # source-related keys.
         clean_source_related_lines_in_place(contents)

         with open(os.path.join(pkg_path, "Cargo.toml"), "wb") as cargo:
             tomli_w.dump(contents, cargo)

     def _replace_source_contents(self, package_path, compile_error):
         # First load the checksum file before starting
         checksum_file = os.path.join(package_path, ".cargo-checksum.json")
         with open(checksum_file, "r") as csum:
             checksum_contents = json.load(csum)

         # Also load the cargo.toml file which we need to write back
         cargo_file = os.path.join(package_path, "Cargo.toml")
         with open(cargo_file, "rb") as cfile:
             cargo_contents = cfile.read()

         shutil.rmtree(package_path)

         # Make package and src dirs and replace lib.rs
         os.makedirs(os.path.join(package_path, "src"), exist_ok=True)
         with open(os.path.join(package_path, "src", "lib.rs"), "w") as librs:
             librs.write(
                 EMPTY_CRATE_BODY if compile_error else NOP_EMPTY_CRATE_BODY
             )

         # Restore cargo.toml
         with open(cargo_file, "wb") as cfile:
             cfile.write(cargo_contents)

         # Restore checksum
         with open(checksum_file, "w") as csum:
             json.dump(checksum_contents, csum)

     def destroy_unused_crates(
         self, destroyed_crates_file: Path
     ) -> List[Tuple[str, str]]:
         metadata = [
             (x["name"], x["version"])
             for x in load_single_metadata(
                 self.working_dir, filter_platform=None
             )["packages"]
         ]
         used_packages = {
             (x["name"], x["version"])
             for x in load_all_package_metadata(self.working_dir)
         }

         cleaned_packages = []
         # Since we're asking for _all_ metadata packages, we may see
         # duplication.
         for package_desc in metadata:
             package_name, package_version = package_desc
             if package_desc in used_packages:
                 # b/239449434: Due to RUSTSEC-2020-0071, we manually empty
                 # time-0.1. It's present in the depgraph because chrono brings it
                 # in by default under the `oldtime` feature. Nothing in our
                 # depgraph actually makes use of this.
                 #
                 # b/271837931: bindgen versons before 0.63 do not work correctly
                 # with newer versions of LLVM. We patch grpcio-sys to depend on
                 # bindgen-0.63, but vendoring happens before that, so bindgen-0.57
                 # is still pulled in. This causes build errors, so remove it.
                 #
                 # b/288421251: The failure crate is no longer maintained and
                 # has an open CVE, so we patched the only user while we wait
                 # for a pull requrest to be accepted upstream.
                 #
                 # b/321669037: shlex 0.1 is affected by RUSTSEC-2024-0006. The
                 # only user is bindgen 0.57, which is patched out.
                 #
                 # b/347443966: blazesym by default uses simd-adler32. We'd
                 # rather patch it to avoid using "custom" crypto. Unfortunately
                 # this happens after vendoring, so remove it.
                 force_destroy_crate = (
                     (
                         package_name == "time"
                         and package_version.startswith("0.1")
                     )
                     or (
                         package_name == "bindgen"
                         and package_version.startswith("0.57")
                     )
                     or (package_name == "failure")
                     or (
                         package_name == "shlex"
                         and package_version.startswith("0.1")
                     )
                     or (package_name == "simd-adler32")
                 )
                 if not force_destroy_crate:
                     continue
                 logging.info(
                     f"Forcibly emptying %s@%s", package_name, package_version
                 )

             # Detect the correct package path to destroy
             pkg_path = os.path.join(
                 self.vendor_dir,
                 "{}-{}".format(package_name, package_version),
             )
             if not os.path.isdir(pkg_path):
                 logging.info(
                     f"Crate %s not found at %s", package_name, pkg_path
                 )
                 continue

             self._replace_source_contents(
                 pkg_path, compile_error=package_name not in NOP_EMPTY_CRATES
             )
             self._modify_cargo_toml(pkg_path)
             _rerun_checksums(pkg_path)
             cleaned_packages.append((package_name, package_version))

         for pkg, ver in cleaned_packages:
             logging.info("Removed unused crate %s@%s", pkg, ver)

         # Write a list of crates that've been destroyed. This is used by
         # `scripts/cargo-vet.py`.
         file_header = "# List of destroyed crates autogenerated by vendor.py."
         file_lines = [f"{pkg} {ver}" for pkg, ver in cleaned_packages]
         # Ensure there's a newline at the end of the file, so `cros format`
         # remains happy.
         file_lines.append("")
         destroyed_crates_file.write_text(
             "\n".join([file_header] + file_lines), encoding="utf-8"
         )
         return cleaned_packages


 class InProgressStamp:
     """Class that represents an 'in-progress' file.

     This file helps make it more obvious when vendor.py has not completed
     successfully: b/278073343. It's intended to stick around until vendor.py
     terminates successfully, so this isn't phrased as a contextmanager or
     similar.
     """

     def __init__(self, vendor_artifacts: Path):
         in_progress_stamp = vendor_artifacts / "vendor_script_in_progress"
         message = "\n".join(
             (
                 "# Stamp file that's created when vendor.py started running,",
                 "# and removed when vendor.py completes successfully.",
                 "# If this file is hanging around, vendor.py did not terminate",
                 "# successfully. Please try not to land a change that leaves",
                 "# vendor.py broken.",
             )
         )
         in_progress_stamp.write_text(message, encoding="utf-8")
         self._in_progress_stamp = in_progress_stamp

     def note_successful_termination(self):
         """Called when vendor.py is about to exit successfully."""
         self._in_progress_stamp.unlink()


 @functools.total_ordering
 class Package:
     @classmethod
     def from_metadata(cls, metadata: Dict[str, Any]) -> "Package":
         crates_by_id = {}
         versions_by_name = collections.defaultdict(list)
         packages = {pkg["id"]: pkg for pkg in metadata["packages"]}
         resolved = {pkg["id"]: pkg for pkg in metadata["resolve"]["nodes"]}
         assert packages.keys() == resolved.keys()
         for pkg_id, pkg in packages.items():
             crate = Package(pkg_id, pkg, resolved[pkg_id])
             versions_by_name[crate.name].append(crate)
             crates_by_id[crate.id] = crate
         latest_crates = {
             name: max(crates) for name, crates in versions_by_name.items()
         }

         for crate in crates_by_id.values():
             crate.is_latest = crate == latest_crates[crate.name]
             for pkg_id in crate.dep_ids:
                 dep = crates_by_id[pkg_id]
                 crate.deps.append(dep)
                 dep.reverse_deps.append(crate)

         return sorted(crates_by_id.values())

     def __init__(
         self, pkg_id: str, package: Dict[str, Any], resolved: Dict[str, Any]
     ):
         self.name = package["name"]
         self.version = package["version"]
         source = package["source"]
         self.id = pkg_id
         self.is_external = source is not None and source.startswith("registry+")
         self.dep_ids = [dep["pkg"] for dep in resolved["deps"]]
         self.deps: List[Package] = []
         self.reverse_deps: List[Package] = []
         self.is_latest = True

         # Sometimes features can enable a feature in a transitive crate.
         # Ignore them and set them manually in annotations.json.
         default_features = package["features"].get("default", [])
         self.default_features = {f for f in default_features if "/" not in f}
         self.features = set(resolved["features"])
         # Inline default features here. Undo this when outputting features.
         if "default" in self.features:
             self.features.discard("default")
             self.features.update(self.default_features)

         def find_feature(
             options: List[str], features: List[str]
         ) -> Optional[str]:
             for option in options:
                 if option in features:
                     return option
             return None

         enable_std_flag = find_feature(_STD_FEATURES, package["features"])
         disable_std_flag = find_feature(_NO_STD_FEATURES, package["features"])
         supports_no_std = (
             enable_std_flag
             or disable_std_flag
             or find_feature(_NO_STD_FEATURES, package["categories"])
             or find_feature(_NO_STD_FEATURES, package["keywords"])
         )

         if supports_no_std and self.name not in _FAKE_NO_STD_CRATES:
             self.no_std_features = set(self.features)
             if disable_std_flag:
                 self.no_std_features.add(disable_std_flag)
             elif enable_std_flag:
                 self.no_std_features.discard(enable_std_flag)
         else:
             self.no_std_features = None

     def format(self, features: Set[str]) -> str:
         # Only bother outputting 3pp crates that we directly depend on.
         if not self.is_external or all(
             dep.is_external for dep in self.reverse_deps
         ):
             return ""
         default_features = self.default_features.issubset(features)
         if default_features:
             features = features.difference(self.default_features)

         lines = []
         if self.is_latest:
             name = self.name
         else:
             name = (
                 self.name
                 + "-"
                 + self.version.replace(".", "-").replace("+", "-")
             )
             # This is used for us to see "What do I need to remove to be able to put everything on the same version?". There's not much value to showing this for the latest versions, especially since they will be a much longer list.
             for rdep in self.reverse_deps:
                 lines.append(f"# Used by {rdep.name}-{rdep.version}")

         if name == self.name and default_features and not features:
             lines.append(f'{name} = "{self.version}"')
         else:
             default_features = (
                 "" if default_features else ', "default-features" = false'
             )
             features = (
                 f", features = {sorted(features)}" if self.features else ""
             )
             crate = "" if name == self.name else f'package = "{self.name}", '
             lines.append(
                 f'{name} = {{ {crate}"version" = "{self.version}"{default_features}{features} }}'
             )
         return "\n".join(lines) + "\n"

     def _key(self):
         return (
             self.name,
             # TODO: strings can't compare against ints; implement proper semver
             # ordering if this ever becomes a problem: https://semver.org/
             [int(x) if x.isdigit() else x for x in self.version.split(".")],
         )

     def __repr__(self):
         return f"{self.name} {self.version}"

     def __eq__(self, other: "Package"):
         return self._key() == other._key()

     def __lt__(self, other: "Package"):
         return self._key() < other._key()


 def merge_annotations(a: Any, b: Any):
     """Merges two BazelAnnotations together, if possible."""
     if a is None:
         return b

     if b is None:
         return a

     if isinstance(a, list) and isinstance(b, list):
         return a + b

     if isinstance(a, dict) and isinstance(b, dict):
         result = {}
         for key in set(itertools.chain(a.keys(), b.keys())):
             result[key] = merge_annotations(a.get(key), b.get(key))
         return result

     # Merging strings may be doable in some cases (e.g., rustflags), but not
     # all (e.g., patch program names). Also catch merging of different types.
     raise TypeError(
         f"Can't merge values {a} and {b} of types {type(a)} and {type(b)}"
     )


 def merge_annotation_maps(
     map_a: Dict[str, List[BazelAnnotation]],
     map_b: Dict[str, List[BazelAnnotation]],
     mode: str,
 ) -> Dict[str, List[BazelAnnotation]]:
     """Merges two bazel annotation maps into one."""
     # {crate_name: {crate_version: BazelAnnotation}}
     annotations_by_version = collections.defaultdict(
         lambda: collections.defaultdict(dict)
     )
     for crate_name, annotations in itertools.chain(
         map_a.items(), map_b.items()
     ):
         crate_annotations = annotations_by_version[crate_name]
         for a in annotations:
             a = copy.deepcopy(a)
             modes = set(a.pop("modes", _ALL_MODES))
             if not modes.issubset(_ALL_MODES):
                 raise ValueError(
                     f"Invalid modes {modes}. Modes must be a subset of {_ALL_MODES}"
                 )

             if mode in modes:
                 version = a.get("version", "*")
                 existing = crate_annotations[version]
                 # `merge_annotations` can't merge the version field; handle that
                 # manually.
                 existing.pop("version", None)
                 crate_annotations[version] = merge_annotations(a, existing)
                 existing["version"] = version

     return {
         crate_name: [v for k, v in sorted(annotations.items())]
         for crate_name, annotations in annotations_by_version.items()
     }


 def generate_annotations_file(
     cargo_dir: Path,
     mode,
     all_annotations: Sequence[Dict[str, List[BazelAnnotation]]],
 ):
     merged_annotations = {}
     for annotation in all_annotations:
         merged_annotations = merge_annotation_maps(
             merged_annotations, annotation, mode=mode
         )

     package_versions = collections.defaultdict(set)
     with (cargo_dir / "Cargo.lock").open("rb") as f:
         for package in tomli.load(f)["package"]:
             package_versions[package["name"]].add(package["version"])
     for package, versions in package_versions.items():
         # All of these versions are treated as "*" and are always valid
         versions.update(["*", "", None])

     filtered_annotations = collections.defaultdict(list)
     for package, annotations in merged_annotations.items():
         for annotation in annotations:
             if annotation.get("version") in package_versions[package]:
                 filtered_annotations[package].append(annotation)

     with (cargo_dir / "annotations.json").open("w", encoding="utf-8") as f:
         json.dump(filtered_annotations, f, indent=2, sort_keys=True)
         f.write("\n")


 def generate_metallurgy_crates(
     projects_dir: Path,
     vendor_artifacts_dir: Path,
     bazel_artifacts_dir: Path,
     available_patches: Dict[str, List[BazelAnnotation]],
     destroyed_crates: Set[Tuple[str, str]],
 ):
     logging.info(
         "Ensuring metallurgy crates are in sync with non-metallurgy crates..."
     )
     with (bazel_artifacts_dir / "annotations.toml").open("rb") as f:
         annotations = tomli.load(f)

     generate_annotations_file(
         cargo_dir=vendor_artifacts_dir / "alchemy",
         mode="alchemy",
         all_annotations=[available_patches, annotations],
     )

     metadata = load_single_metadata(projects_dir.parent, filter_platform=None)
     crates = Package.from_metadata(metadata)
     for subdir, std in [("std", True), ("no_std", False)]:
         logging.info("Syncing %s crates", subdir)
         cargo_dir = vendor_artifacts_dir / subdir
         metallurgy_crates = cargo_dir / "Cargo.toml"
         with metallurgy_crates.open("w", encoding="utf-8") as f:
             f.write(_METALLURGY_CARGO_TOML_HEADER)
             for crate in crates:
                 if (crate.name, crate.version) in destroyed_crates:
                     continue
                 features = crate.features if std else crate.no_std_features
                 if features is not None:
                     f.write(crate.format(features))

         # Copy from Cargo.lock to ensure we start with the same deps, and then do a
         # minimal update (--workspace does a minimal update).
         orig_lockfile = projects_dir / "Cargo.lock"
         new_lockfile = cargo_dir / "Cargo.lock"
         shutil.copyfile(orig_lockfile, new_lockfile)
         subprocess.run(
             ["cargo", "update", "--workspace"], cwd=cargo_dir, check=True
         )

         generate_annotations_file(
             cargo_dir=cargo_dir,
             mode=subdir,
             all_annotations=[available_patches, annotations],
         )

     logging.info("Crates in sync.")


 def main():
     logging.basicConfig(
         format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: "
         "%(message)s",
         level=logging.INFO,
     )

     parser = argparse.ArgumentParser(description="Vendor packages properly")
     parser.add_argument(
         "--skip-license-check",
         "-s",
         help="Skip the license check on a specific package",
         action="append",
     )
     parser.add_argument("--license-map", help="Write license map to this file")
     parser.add_argument(
         "--skip-version-check",
         action="store_true",
         help="Don't exit if our repo isn't up-to-date.",
     )
     parser.add_argument(
         "--skip-cargo-vendor",
         action="store_true",
         help="Don't run cargo-vendor. Please don't upload changes that skip "
         "this check. This flag is for local development use only.",
     )
     parser.add_argument(
         "--skip-metallurgy",
         action="store_true",
         help="Skip metallurgy file generation. Please don't upload changes "
         "that skip this step. This flag is for local development use only.",
     )
     args = parser.parse_args()

     current_path = Path(__file__).parent.absolute()
     if not args.skip_version_check:
         rust_crates.exit_if_head_is_not_up_to_date(
             current_path, disable_check_flag="--skip-version-check"
         )

     patches = os.path.join(current_path, "patches")
     vendor = os.path.join(current_path, "vendor")
     vendor_artifacts = current_path / "vendor_artifacts"
     license_shorthand_file = os.path.join(vendor_artifacts, "licenses_used.txt")
     destroyed_crates_file = vendor_artifacts / "destroyed_crates.txt"
     in_progress_stamp = InProgressStamp(vendor_artifacts)

     # First, actually run cargo vendor
     if args.skip_cargo_vendor:
         # Since this is for dev only, this is fine.
         destroyed_crates = set()
     else:
         run_cargo_vendor(current_path)

         # Order matters here:
         # - Apply patches (also re-calculates checksums)
         # - Cleanup any owners files (otherwise, git check-in or checksums are
         #   unhappy)
         # - Destroy unused crates
         apply_patches(patches, vendor)
         cleanup_owners(vendor)
         destroyer = CrateDestroyer(current_path, vendor)
         destroyed_crates = destroyer.destroy_unused_crates(
             destroyed_crates_file
         )
         destroyed_crates = set(destroyed_crates)

     if not args.skip_metallurgy:
         patches_manifest = generate_patches_manifest(
             {
                 "@@//third_party/rust_crates/patches:{patch}": Path(patches),
                 "@@//bazel/rust/alchemy_crates/patches:{patch}": (
                     vendor_artifacts / "alchemy/patches"
                 ),
             }
         )
         generate_metallurgy_crates(
             current_path / "projects",
             vendor_artifacts,
             current_path / "bazel_files",
             patches_manifest,
             destroyed_crates,
         )

     if not args.skip_cargo_vendor:
         # Combine license file and check for any bad licenses
         lm = LicenseManager(current_path, vendor)
         lm.generate_license(
             args.skip_license_check,
             args.license_map,
             license_shorthand_file,
             destroyed_crates,
         )

     if args.skip_cargo_vendor or args.skip_metallurgy:
         what_skipped = (
             "cargo-vendor" if args.skip_cargo_vendor else "metallurgy checks"
         )
         logging.warning(
             f"Skipped %s. This is for local dev only.", what_skipped
         )
         # Don't remove `in_progress_stamp`; it should hopefully serve as an
         # extra reminder to rerun this without skipping cargo-vet.
         return

     in_progress_stamp.note_successful_termination()


 if __name__ == "__main__":
     main()