blob: 42d9b00f004aeb76573805be67d808d32cb794b0 [file] [log] [blame] [edit]
use std::path::Path;
use std::process::{Command, Stdio};
use crate::ci::CiEnv;
#[derive(Debug)]
pub struct GitConfig<'a> {
pub nightly_branch: &'a str,
pub git_merge_commit_email: &'a str,
}
/// Runs a command and returns the output
pub fn output_result(cmd: &mut Command) -> Result<String, String> {
let output = match cmd.stderr(Stdio::inherit()).output() {
Ok(status) => status,
Err(e) => return Err(format!("failed to run command: {cmd:?}: {e}")),
};
if !output.status.success() {
return Err(format!(
"command did not execute successfully: {:?}\n\
expected success, got: {}\n{}",
cmd,
output.status,
String::from_utf8(output.stderr).map_err(|err| format!("{err:?}"))?
));
}
String::from_utf8(output.stdout).map_err(|err| format!("{err:?}"))
}
/// Represents the result of checking whether a set of paths
/// have been modified locally or not.
#[derive(PartialEq, Debug, Clone)]
pub enum PathFreshness {
/// Artifacts should be downloaded from this upstream commit,
/// there are no local modifications.
LastModifiedUpstream { upstream: String },
/// There are local modifications to a certain set of paths.
/// "Local" essentially means "not-upstream" here.
/// `upstream` is the latest upstream merge commit that made modifications to the
/// set of paths.
HasLocalModifications { upstream: String },
/// No upstream commit was found.
/// This should not happen in most reasonable circumstances, but one never knows.
MissingUpstream,
}
/// This function figures out if a set of paths was last modified upstream or
/// if there are some local modifications made to them.
/// It can be used to figure out if we should download artifacts from CI or rather
/// build them locally.
///
/// The function assumes that at least a single upstream bors merge commit is in the
/// local git history.
///
/// `target_paths` should be a non-empty slice of paths (git `pathspec`s) relative to `git_dir`
/// whose modifications would invalidate the artifact.
/// Each pathspec can also be a negative match, i.e. `:!foo`. This matches changes outside
/// the `foo` directory.
/// See <https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec>
/// for how git `pathspec` works.
///
/// The function behaves differently in CI and outside CI.
///
/// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
/// top of the latest upstream commit that is available in local git history.
/// If not, we try to find the most recent upstream commit (which we assume are commits
/// made by bors) that modified `target_paths`.
/// We don't want to simply take the latest master commit to avoid changing the output of
/// this function frequently after rebasing on the latest master branch even if `target_paths`
/// were not modified upstream in the meantime. In that case we would be redownloading CI
/// artifacts unnecessarily.
///
/// - In CI, we use a shallow clone of depth 2, i.e., we fetch only a single parent commit
/// (which will be the most recent bors merge commit) and do not have access
/// to the full git history. Luckily, we only need to distinguish between two situations:
/// 1) The current PR made modifications to `target_paths`.
/// In that case, a build is typically necessary.
/// 2) The current PR did not make modifications to `target_paths`.
/// In that case we simply take the latest upstream commit, because on CI there is no need to avoid
/// redownloading.
pub fn check_path_modifications(
git_dir: &Path,
config: &GitConfig<'_>,
target_paths: &[&str],
ci_env: CiEnv,
) -> Result<PathFreshness, String> {
assert!(!target_paths.is_empty());
for path in target_paths {
assert!(Path::new(path.trim_start_matches(":!")).is_relative());
}
let upstream_sha = if matches!(ci_env, CiEnv::GitHubActions) {
// Here the situation is different for PR CI and try/auto CI.
// For PR CI, we have the following history:
// <merge commit made by GitHub>
// 1-N PR commits
// upstream merge commit made by bors
//
// For try/auto CI, we have the following history:
// <**non-upstream** merge commit made by bors>
// 1-N PR commits
// upstream merge commit made by bors
//
// But on both cases, HEAD should be a merge commit.
// So if HEAD contains modifications of `target_paths`, our PR has modified
// them. If not, we can use the only available upstream commit for downloading
// artifacts.
// Do not include HEAD, as it is never an upstream commit
// If we do not find an upstream commit in CI, something is seriously wrong.
Some(
get_closest_upstream_commit(Some(git_dir), config, ci_env)?
.expect("No upstream commit was found on CI"),
)
} else {
// Outside CI, we want to find the most recent upstream commit that
// modified the set of paths, to have an upstream reference that does not change
// unnecessarily often.
// However, if such commit is not found, we can fall back to the latest upstream commit
let upstream_with_modifications =
get_latest_upstream_commit_that_modified_files(git_dir, config, target_paths)?;
match upstream_with_modifications {
Some(sha) => Some(sha),
None => get_closest_upstream_commit(Some(git_dir), config, ci_env)?,
}
};
let Some(upstream_sha) = upstream_sha else {
return Ok(PathFreshness::MissingUpstream);
};
// For local environments, we want to find out if something has changed
// from the latest upstream commit.
// However, that should be equivalent to checking if something has changed
// from the latest upstream commit *that modified `target_paths`*, and
// with this approach we do not need to invoke git an additional time.
if has_changed_since(git_dir, &upstream_sha, target_paths) {
Ok(PathFreshness::HasLocalModifications { upstream: upstream_sha })
} else {
Ok(PathFreshness::LastModifiedUpstream { upstream: upstream_sha })
}
}
/// Returns true if any of the passed `paths` have changed since the `base` commit.
pub fn has_changed_since(git_dir: &Path, base: &str, paths: &[&str]) -> bool {
run_git_diff_index(Some(git_dir), |cmd| {
cmd.args(["--quiet", base, "--"]).args(paths);
// Exit code 0 => no changes
// Exit code 1 => some changes were detected
!cmd.status().expect("cannot run git diff-index").success()
})
}
/// Returns the latest upstream commit that modified `target_paths`, or `None` if no such commit
/// was found.
fn get_latest_upstream_commit_that_modified_files(
git_dir: &Path,
git_config: &GitConfig<'_>,
target_paths: &[&str],
) -> Result<Option<String>, String> {
let mut git = Command::new("git");
git.current_dir(git_dir);
// In theory, we could just use
// `git rev-list --first-parent HEAD --author=<merge-bot> -- <paths>`
// to find the latest upstream commit that modified `<paths>`.
// However, this does not work if you are in a subtree sync branch that contains merge commits
// which have the subtree history as their first parent, and the rustc history as second parent:
// `--first-parent` will just walk up the subtree history and never see a single rustc commit.
// We thus have to take a two-pronged approach. First lookup the most recent upstream commit
// by *date* (this should work even in a subtree sync branch), and then start the lookup for
// modified paths starting from that commit.
//
// See https://github.com/rust-lang/rust/pull/138591#discussion_r2037081858 for more details.
let upstream = get_closest_upstream_commit(Some(git_dir), git_config, CiEnv::None)?
.unwrap_or_else(|| "HEAD".to_string());
git.args([
"rev-list",
"--first-parent",
"-n1",
&upstream,
"--author",
git_config.git_merge_commit_email,
]);
if !target_paths.is_empty() {
git.arg("--").args(target_paths);
}
let output = output_result(&mut git)?.trim().to_owned();
if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
}
/// Returns the most recent (ordered chronologically) commit found in the local history that
/// should exist upstream. We identify upstream commits by the e-mail of the commit
/// author.
///
/// If we are in CI, we simply return our first parent.
pub fn get_closest_upstream_commit(
git_dir: Option<&Path>,
config: &GitConfig<'_>,
env: CiEnv,
) -> Result<Option<String>, String> {
let base = match env {
CiEnv::None => "HEAD",
CiEnv::GitHubActions => {
// On CI, we should always have a non-upstream merge commit at the tip,
// and our first parent should be the most recently merged upstream commit.
// We thus simply return our first parent.
return resolve_commit_sha(git_dir, "HEAD^1").map(Some);
}
};
let mut git = Command::new("git");
if let Some(git_dir) = git_dir {
git.current_dir(git_dir);
}
// We do not use `--first-parent`, because we can be in a situation (outside CI) where we have
// a subtree merge that actually has the main rustc history as its second parent.
// Using `--first-parent` would recurse into the history of the subtree, which could have some
// old bors commits that are not relevant to us.
// With `--author-date-order`, git recurses into all parent subtrees, and returns the most
// chronologically recent bors commit.
// Here we assume that none of our subtrees use bors anymore, and that all their old bors
// commits are way older than recent rustc bors commits!
git.args([
"rev-list",
"--author-date-order",
&format!("--author={}", config.git_merge_commit_email),
"-n1",
base,
]);
let output = output_result(&mut git)?.trim().to_owned();
if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
}
/// Resolve the commit SHA of `commit_ref`.
fn resolve_commit_sha(git_dir: Option<&Path>, commit_ref: &str) -> Result<String, String> {
let mut git = Command::new("git");
if let Some(git_dir) = git_dir {
git.current_dir(git_dir);
}
git.args(["rev-parse", commit_ref]);
Ok(output_result(&mut git)?.trim().to_owned())
}
/// Returns the files that have been modified in the current branch compared to the master branch.
/// This includes committed changes, uncommitted changes, and changes that are not even staged.
///
/// The `extensions` parameter can be used to filter the files by their extension.
/// Does not include removed files.
/// If `extensions` is empty, all files will be returned.
pub fn get_git_modified_files(
config: &GitConfig<'_>,
git_dir: Option<&Path>,
extensions: &[&str],
) -> Result<Vec<String>, String> {
let Some(merge_base) = get_closest_upstream_commit(git_dir, config, CiEnv::None)? else {
return Err("No upstream commit was found".to_string());
};
let files = run_git_diff_index(git_dir, |cmd| {
output_result(cmd.args(["--name-status", merge_base.trim()]))
})?
.lines()
.filter_map(|f| {
let (status, name) = f.trim().split_once(char::is_whitespace).unwrap();
if status == "D" {
None
} else if Path::new(name).extension().map_or(extensions.is_empty(), |ext| {
// If there is no extension, we allow the path if `extensions` is empty
// If there is an extension, we allow it if `extension` is empty or it contains the
// extension.
extensions.is_empty() || extensions.contains(&ext.to_str().unwrap())
}) {
Some(name.to_owned())
} else {
None
}
})
.collect();
Ok(files)
}
/// diff-index can return outdated information, because it does not update the git index.
/// This function uses `update-index` to update the index first, and then provides `func` with a
/// command prepared to run `git diff-index`.
fn run_git_diff_index<F, T>(git_dir: Option<&Path>, func: F) -> T
where
F: FnOnce(&mut Command) -> T,
{
let git = || {
let mut git = Command::new("git");
if let Some(git_dir) = git_dir {
git.current_dir(git_dir);
}
git
};
// We ignore the exit code, as it errors out when some files are modified.
let _ = output_result(git().args(["update-index", "--refresh", "-q"]));
func(git().arg("diff-index"))
}
/// Returns the files that haven't been added to git yet.
pub fn get_git_untracked_files(git_dir: Option<&Path>) -> Result<Option<Vec<String>>, String> {
let mut git = Command::new("git");
if let Some(git_dir) = git_dir {
git.current_dir(git_dir);
}
let files = output_result(git.arg("ls-files").arg("--others").arg("--exclude-standard"))?
.lines()
.map(|s| s.trim().to_owned())
.collect();
Ok(Some(files))
}