Change the premerge checks to only check the affected projects

The current setup is configuring the "affected projects" as well as their dependencies, and run `ninja all` followed by `ninja check-all`. This is quite a pessimization for leaf project which don't need to build and run the tests for their dependencies. For example a patch affecting only MLIR shouldn't need to build and run LLVM and clang tests. This patch changes this by running checks only for the affected project. For example a patch touching `mlir` is affecting `mlir` and `flang`. However `flang` depends on `clang`. So the list of projects to configure is `mlir;flang;clang;llvm;`, but we want to test only mlir and flang ; we'll run only `ninja check-mlir check-flang`. In practice in this example running `ninja all` builds 5658 targets and `ninja check-all` after that adds 716 more targets. On the other hands `ninja check-flang check-mlir` results in 3997 targets total. Concretely the contract with premerge_checks.py is changed so that the expected argument for the --projects flag is only the list of affected project, dependencies are automatically added.
2021-09-17 20:31:15 +00:00 · 2021-09-17 20:31:15 +00:00 · 6e624c30f9
commit 6e624c30f9
parent eb5f0871c9
4 changed files with 117 additions and 73 deletions
--- a/scripts/choose_projects.py
+++ b/scripts/choose_projects.py
@ -60,12 +60,20 @@ class ChooseProjects:
        for user, used_list in self.dependencies.items():
            for used in used_list:
                self.usages.setdefault(used, []).append(user)
-        self.all_projects = self.config['allprojects']
+        self.all_projects = self.config['allprojects'].keys()

    def get_excluded(self, target: str) -> Set[str]:
        excluded = self.config['excludedProjects'][target]
        return set(excluded if excluded is not None else [])

+    def get_check_targets(self, affected_projects: Set[str]) -> Set[str]:
+        """Return the `check-xxx` targets to pass to ninja for the given list of projects"""
+        targets = set()
+        all_projects = self.config['allprojects']
+        for project in affected_projects:
+            targets.update(set(all_projects[project]))
+        return targets
+
    @staticmethod
    def _detect_os() -> str:
        """Detect the current operating system."""
@ -73,7 +81,7 @@ class ChooseProjects:
            return 'windows'
        return 'linux'

-    def choose_projects(self, patch: str = None) -> List[str]:
+    def choose_projects(self, patch: str = None, current_os: str = None) -> List[str]:
        """List all touched project with all projects that they depend on and also
        all projects that depend on them"""
        if self.llvm_dir is None:
@ -90,23 +98,37 @@ class ChooseProjects:
            logging.warning('There were changes that could not be mapped to a project.'
                            'Building all projects instead!')
            return self.FALLBACK_PROJECTS
-        return self.extend_projects(changed_projects)
+        return self.extend_projects(changed_projects, current_os)

-    def extend_projects(self, projects: Set[str]) -> List[str]:
+    def extend_projects(self, projects: Set[str], current_os : str = None) -> List[str]:
        logging.info(f'projects: {projects}')
+        if not current_os:
+            current_os = self._detect_os()
+        excluded_projects = self.get_excluded(current_os)
        affected_projects = self.get_affected_projects(projects)
        logging.info(f'with affected projects: {affected_projects}')
-        affected_projects = self.add_dependencies(affected_projects)
-        logging.info(f'with dependencies: {affected_projects}')
-        to_exclude = affected_projects.intersection(self.get_excluded(self._detect_os()))
-        if len(to_exclude) != 0:
+        to_exclude = affected_projects.intersection(excluded_projects)
+        if len(to_exclude):
+            logging.warning(f'{to_exclude} projects are excluded on {current_os}')
        affected_projects = affected_projects - to_exclude
-            logging.warning(f'{to_exclude} projects are excluded')
-            logging.info(f'without excluded: {affected_projects}')
-        return sorted(affected_projects)
+        all_dependencies = set()
+        for project in affected_projects:
+            dependencies = self.get_dependencies(affected_projects)
+            logging.debug(f'> project {project} with dependencies: {dependencies}')
+            to_exclude = dependencies.intersection(excluded_projects)
+            if len(to_exclude) != 0:
+                logging.warning(f'Excluding project {project} because of excluded dependencies {to_exclude}')
+                affected_projects = affected_projects - project
+            else:
+                all_dependencies.update(dependencies)
+        logging.info(f'full dependencies: {all_dependencies}')
+        return sorted(affected_projects), sorted(all_dependencies)

    def run(self):
-        print(';'.join(self.choose_projects()))
+        affected_projects, dependencies = self.choose_projects()
+        print("Affected:", ';'.join(affected_projects))
+        print("Dependencies:", ';'.join(dependencies))
+        print("Check targets:", ';'.join(self.get_check_targets(affected_projects)))
        return 0

    def match_projects_dirs(self) -> bool:
@ -116,7 +138,7 @@ class ChooseProjects:
        subdirs = os.listdir(self.llvm_dir)
        for project in self.all_projects:
            if project not in subdirs:
-                logging.error('Project no found in LLVM root folder: {}'.format(project))
+                logging.error('Project not found in LLVM root folder: {}'.format(project))
                return False
        return True

@ -135,6 +157,7 @@ class ChooseProjects:

    def get_changed_projects(self, changed_files: Set[str]) -> Tuple[Set[str], bool]:
        """Get list of projects affected by the change."""
+        logging.info("Get list of projects affected by the change.")
        changed_projects = set()
        unmapped_changes = False
        for changed_file in changed_files:
@ -167,21 +190,24 @@ class ChooseProjects:
        logging.info(f'added {affected_projects - changed_projects} projects as they are affected')
        return affected_projects

-    def add_dependencies(self, projects: Set[str]) -> Set[str]:
-        """Return projects and their dependencies.
+    def get_dependencies(self, projects: Set[str]) -> Set[str]:
+        """Return transitive dependencies for a given project.

-        All all dependencies to `projects` so that they can be built.
+        These are the required dependencies for given `projects` so that they can be built.
        """
-        result = set(projects)
-        last_len = -1
-        while len(result) != last_len:
-            last_len = len(result)
-            changes = set()
-            for project in result:
+        all_dependencies = set()
+        # Recursive call to add a project and all its dependencies to `all_dependencies`.
+        def add_dependencies_for_project(project: str):
+            if project in all_dependencies:
+                return
            if project in self.dependencies:
-                    changes.update(self.dependencies[project])
-            result.update(changes)
-        return result
+                for dependent in self.dependencies[project]:
+                    if dependent not in projects:
+                        all_dependencies.add(dependent)
+                        add_dependencies_for_project(dependent)
+        for project in projects:
+            add_dependencies_for_project(project)
+        return all_dependencies

    def get_all_enabled_projects(self) -> List[str]:
        """Get list of all not-excluded projects for current platform."""
@ -191,7 +217,7 @@ class ChooseProjects:
 if __name__ == "__main__":
    logging.basicConfig(filename='choose_projects.log', level=logging.INFO)
    parser = argparse.ArgumentParser(
-        description='Compute the projects affected by a change.')
+        description='Compute the projects affected by a change. A patch file is expected on stdin.')
    parser.add_argument('llvmdir', default='.')
    args = parser.parse_args()
    chooser = ChooseProjects(args.llvmdir)
--- a/scripts/llvm-dependencies.yaml
+++ b/scripts/llvm-dependencies.yaml
@ -46,25 +46,27 @@ dependencies:

 # List of all projects in the LLVM monorepository. This list is taken from
 # llvm/CMakeLists.txt in "set(LLVM_ALL_PROJECTS ..."
+# The value for all project is the list of targets to tests when this project
+# is affected by a patch.
 allprojects:
-  - clang
-  - clang-tools-extra
-  - compiler-rt
-  - cross-project-tests
-  - flang
-  - libc
-  - libclc
-  - libcxx
-  - libcxxabi
-  - libunwind
-  - lld
-  - lldb
-  - mlir
-  - openmp
-  - parallel-libs
-  - polly
-  - pstl
-  - llvm
+  clang: ["check-clang"]
+  clang-tools-extra: ["check-clang-tools"]
+  compiler-rt: ["check-all"]  # check-compiler-rt seems to exist only in standalone builds.
+  cross-project-tests: ["check-cross-project"]
+  flang: ["check-flang"]
+  libc: ["check-libc"]
+  libclc: ["check-all"] # There does not seem to be a more specific target.
+  libcxx: ["check-cxx"]
+  libcxxabi: ["check-cxxabi"]
+  libunwind: ["check-unwind"]
+  lld: ["check-lld"]
+  lldb: ["check-all"] # FIXME: `check-lldb` may not include every lldb tests?
+  mlir: ["check-mlir"]
+  openmp: ["check-all"] # There does not seem to be a more specific target.
+  parallel-libs: ["check-all"]
+  polly: ["check-polly-tests"]
+  pstl: ["check-all"] # There does not seem to be a more specific target.
+  llvm: ["check-llvm"]

 # projects excluded from automatic configuration as they could not be built
 excludedProjects:
--- a/scripts/pipeline_premerge.py
+++ b/scripts/pipeline_premerge.py
@ -50,32 +50,21 @@ if __name__ == '__main__':
        if e.startswith('ph_'):
            env[e] = os.getenv(e)
    repo = git.Repo('.')
+    steps = []
    # List all affected projects.
    patch = repo.git.diff("HEAD~1")
    cp = ChooseProjects('.')
-    modified_files = cp.get_changed_files(patch)
-    modified_projects, unmapped_changes = cp.get_changed_projects(modified_files)
-    if unmapped_changes:
-        logging.warning('There were changes that could not be mapped to a project. Checking everything')
-        modified_projects = set(cp.all_projects)
-    logging.info(f'modified projects: {modified_projects}')
-    # Add projects that depend on modified.
-    affected_projects = cp.get_affected_projects(modified_projects)
-    steps = []
-    projects = cp.add_dependencies(affected_projects)
-    logging.info(f'projects with dependencies: {projects}')
-    # Add generic Linux checks.
-    excluded_linux = cp.get_excluded('linux')
-    logging.info(f'excluded for linux: {excluded_linux}')
-    linux_projects = projects - excluded_linux
+
+    linux_projects, dependencies = cp.choose_projects(patch = patch, current_os = "linux")
+    logging.info(f'linux_projects: {linux_projects} (dependencies: {dependencies}')
    if len(linux_projects) > 0:
        steps.extend(generic_linux(';'.join(sorted(linux_projects)), True))
-    # Add generic Windows steps.
-    excluded_windows = cp.get_excluded('windows')
-    logging.info(f'excluded for windows: {excluded_windows}')
-    windows_projects = projects - excluded_windows
+
+    windows_projects, dependencies = cp.choose_projects(patch = patch, current_os = "windows")
+    logging.info(f'windows_projects: {windows_projects} (dependencies: {dependencies}')
    if len(windows_projects) > 0:
        steps.extend(generic_windows(';'.join(sorted(windows_projects))))
+
    # Add custom checks.
    if os.getenv('ph_skip_generated') is None:
        e = os.environ.copy()
@ -83,6 +72,7 @@ if __name__ == '__main__':
        e["BUILDKITE_COMMIT"] = repo.head.commit.hexsha
        for gen in steps_generators:
            steps.extend(from_shell_output(gen, env=e))
+    modified_files = cp.get_changed_files(patch)
    steps.extend(bazel(modified_files))

    if phid is None:
--- a/scripts/premerge_checks.py
+++ b/scripts/premerge_checks.py
@ -33,6 +33,8 @@ from buildkite_utils import upload_file, annotate, strip_emojis
 from exec_utils import watch_shell, if_not_matches, tee
 from phabtalk.phabtalk import Report, PhabTalk, Step

+from choose_projects import ChooseProjects
+

 def ninja_all_report(step: Step, _: Report):
    step.reproduce_commands.append('ninja all')
@ -54,6 +56,16 @@ def ninja_check_all_report(step: Step, _: Report):
    logging.debug(f'ninja check-all: returned {rc}')
    step.set_status_from_exit_code(rc)

+def ninja_check_projects_report(step: Step, _: Report, checks: str):
+    print('Full log will be available in Artifacts "ninja-check.log"', flush=True)
+    step.reproduce_commands.append(f'ninja {checks}')
+    rc = watch_shell(
+        sys.stdout.buffer.write,
+        sys.stderr.buffer.write,
+        f'ninja {checks}', cwd=build_dir)
+    logging.debug(f'ninja {checks}: returned {rc}')
+    step.set_status_from_exit_code(rc)
+

 def run_step(name: str, report: Report, thunk: Callable[[Step, Report], None]) -> Step:
    start = time.time()
@ -76,7 +88,7 @@ def run_step(name: str, report: Report, thunk: Callable[[Step, Report], None]) -

 def cmake_report(projects: str, step: Step, _: Report):
    global build_dir
-    cmake_result, build_dir, cmake_artifacts, commands = run_cmake.run(projects, os.getcwd())
+    cmake_result, build_dir, cmake_artifacts, commands = run_cmake.run(projects, repo_path=os.getcwd())
    for file in cmake_artifacts:
        if os.path.exists(file):
            shutil.copy2(file, artifacts_dir)
@ -94,12 +106,14 @@ def as_dict(obj):
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Runs premerge checks8')
    parser.add_argument('--log-level', type=str, default='WARNING')
+    parser.add_argument('--build-and-test-all', action='store_true',
+                        help="Run `ninja all` and `ninja check-all`, instead of "
+                        "the default of running `ninja check-{project}`.")
    parser.add_argument('--check-clang-format', action='store_true')
    parser.add_argument('--check-clang-tidy', action='store_true')
    parser.add_argument('--projects', type=str, default='detect',
-                        help="Projects to select, either a list or projects like 'clang;libc', or "
-                             "'detect' to automatically infer proejcts from the diff, or "
-                             "'default' to add all enabled projects")
+                        help="Projects to test as a list of projects like 'clang;libc'."
+                        " Dependent projects are automatically added to the CMake invocation.")
    args = parser.parse_args()
    logging.basicConfig(level=args.log_level, format='%(levelname)-7s %(message)s')

@ -116,12 +130,24 @@ if __name__ == '__main__':
    report.name = step_key
    report.success = True

-    cmake = run_step('cmake', report, lambda s, r: cmake_report(args.projects, s, r))
+    projects = set(args.projects.split(";"))
+    cp = ChooseProjects(None)
+    dependencies = cp.get_dependencies(projects)
+    logging.info(f"Dependencies: {dependencies}")
+    enabled_projects = ";".join(dependencies.union(projects))
+
+    cmake = run_step('cmake', report, lambda s, r: cmake_report(enabled_projects, s, r))
    commands_in_build = True
    if cmake.success:
+        if args.build_and_test_all:
            ninja_all = run_step('ninja all', report, ninja_all_report)
            if ninja_all.success:
                run_step('ninja check-all', report, ninja_check_all_report)
+        else:
+            checks = " ".join(cp.get_check_targets(projects))
+            logging.info(f"Running checks: {checks}")
+            report_lambda: Callable[Step, Report] = lambda s, r: ninja_check_projects_report(s, r, checks)
+            run_step(f"ninja {checks}", report, report_lambda)
        if args.check_clang_tidy:
            if commands_in_build:
                s = Step('')