From cc0058c17ee83d4dd889ea119f195664eb6aa8dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=BChnel?= Date: Fri, 13 Mar 2020 13:50:15 +0100 Subject: [PATCH] Improved sync script to consider individual diffs. --- scripts/phab2github/phab2github.py | 154 ++++++++++++++++++++++------ scripts/phab2github/phab_wrapper.py | 38 +++++-- 2 files changed, 152 insertions(+), 40 deletions(-) diff --git a/scripts/phab2github/phab2github.py b/scripts/phab2github/phab2github.py index 5fc37a6..12316b5 100644 --- a/scripts/phab2github/phab2github.py +++ b/scripts/phab2github/phab2github.py @@ -14,7 +14,7 @@ # limitations under the License. import os -from typing import Optional +from typing import Optional, Union import git import logging from phab_wrapper import PhabWrapper, Revision @@ -27,9 +27,16 @@ import sys LLVM_GITHUB_URL = 'ssh://git@github.com/llvm/llvm-project' MY_GITHUB_URL = 'ssh://git@github.com/christiankuehnel/llvm-project' MY_REPO = 'christiankuehnel/llvm-project' + + _LOGGER = logging.getLogger() +class ApplyPatchException(Exception): + """A patch could not be applied.""" + pass + + class Phab2Github: def __init__(self, workdir: str): @@ -38,34 +45,44 @@ class Phab2Github: self.repo = None # type: Optional[git.Repo] self.phab_wrapper = PhabWrapper() self.github = self._create_github() - self.github_repo = self.github.get_repo(MY_REPO) # type: github.Repository + self.github_repo = self.github.get_repo(MY_REPO) # type: github.Repository def sync(self): + """Sync Phabricator to Github.""" _LOGGER.info('Starting sync...') self._refresh_master() + self._delete_phab_branches() revisions = self.phab_wrapper.get_revisions() + pull_requests = {p.title: p for p in self.github_repo.get_pulls(state='open')} for revision in revisions: - try: - self.create_branch(revision) - self.apply_patch(revision.latest_diff, - self.phab_wrapper.get_raw_patch(revision.latest_diff)) - self.repo.git.push('--force', 'origin', revision.branch_name) - # TODO: check if pull request already exists - self.github_repo.create_pull(title=revision.title, - body=revision.summary, - head=revision.branch_name, - base='master') - except Exception as e: - _LOGGER.exception(e) + self.create_branches_for_revision(revision) + if self._has_branch(revision): + if self._branches_identical(revision.branch_name, 'origin/{}'.format(revision.branch_name)): + _LOGGER.info('Branch {} is identical to upstream. Not pushing.'.format(revision.branch_name)) + else: + _LOGGER.info('Pushing branch {} to github...'.format(revision.branch_name)) + # TODO: do we sill need to force-push? + self.repo.git.push('--force', 'origin', revision.branch_name) + if revision.pr_title in pull_requests: + _LOGGER.info('Pull request already exists: {}'.format(pull_requests[revision.pr_title].html_url)) + else: + _LOGGER.info('Creating pull-request for branch {}...'.format(revision.branch_name)) + pr = self.github_repo.create_pull(title=revision.pr_title, + body=revision.pr_summary, + head=revision.branch_name, + base='master') + _LOGGER.info(pr.html_url) + _LOGGER.info('Sync completed.') def _refresh_master(self): + """Clone/update local git repo.""" if not os.path.exists(self.workdir): os.mkdir(self.workdir) if os.path.exists(self.llvm_dir): # TODO: in case of errors: delete and clone - _LOGGER.info('pulling origin...') + _LOGGER.info('pulling origin and upstream...') self.repo = git.Repo(self.llvm_dir) - self.repo.git.fetch() + self.repo.git.fetch('--all') self.repo.git.checkout('master') self.repo.git.pull('upstream', 'master') self.repo.git.push('origin', 'master') @@ -77,35 +94,80 @@ class Phab2Github: self.repo.remotes.upstream.fetch() _LOGGER.info('refresh of master branch completed') - def create_branch(self, revision: Revision): + def create_branches_for_revision(self, revision: Revision): + """Create branches for a Revision and it's Diffs. + + Apply changes from Phabricator to these branches. + """ name = revision.branch_name - if name in self.repo.heads: - self.repo.git.checkout('master') - self.repo.git.branch('-D', name) - base_hash = revision.latest_diff.base_hash + # TODO: only look at diffs that were not yet applied + # TODO: can diffs be modified on Phabricator and keep their ID? + for diff in revision.sorted_diffs: + if self._has_branch(diff): + continue + self.create_branch_for_diff(diff) + patch = self.phab_wrapper.get_raw_patch(diff) + try: + self.apply_patch(diff, patch) + except ApplyPatchException as e: + # TODO: retry on master if this fails + _LOGGER.error('Could not apply patch for Diff {}. Deleting branch'.format(diff.id)) + _LOGGER.exception(e) + self.repo.heads['master'].checkout() + self.repo.delete_head(diff.branch_name) + + diffs = [d for d in revision.sorted_diffs if self._has_branch(d)] + if len(diffs) == 0: + # TODO: handle error + _LOGGER.error('Could not create branch for Revision D{}'.format(revision.id)) + return + new_branch = self.repo.create_head(revision.branch_name, diffs[0].branch_name) + new_branch.checkout() + + for diff in diffs[1:]: + _LOGGER.info('Applying Diff {} onto branch {}'.format(diff.branch_name, revision.branch_name)) + patch = self._create_patch(revision.branch_name, diff.branch_name) + if len(patch) == 0: + _LOGGER.warning('Diff {} is identical to last one.'.format(diff.id)) + else: + try: + self.apply_patch(diff, patch) + except ApplyPatchException: + _LOGGER.error('Applying patch failed, but should not:') + _LOGGER.error(patch) + raise + + _LOGGER.info('Created branch for Revision D{}'.format(revision.id)) + + def create_branch_for_diff(self, diff: "Diff"): + """Create a branch for diff.""" + base_hash = diff.base_hash if base_hash is None: base_hash = 'upstream/master' - _LOGGER.info('creating branch {} based one {}...'.format(name, base_hash)) + _LOGGER.info('creating branch {} based on {}...'.format(diff.branch_name, base_hash)) try: - new_branch = self.repo.create_head(name, base_hash) + new_branch = self.repo.create_head(diff.branch_name, base_hash) except ValueError: # commit hash not found, try again with master _LOGGER.warning('commit hash {} not found in upstream repository. ' - 'Trying master instead...'.format(name, base_hash)) + 'Trying master instead...'.format(diff.branch_name, base_hash)) base_hash = 'upstream/master' - new_branch = self.repo.create_head(name, base_hash) + new_branch = self.repo.create_head(diff.branch_name, base_hash) self.repo.head.reference = new_branch self.repo.head.reset(index=True, working_tree=True) def apply_patch(self, diff: "Diff", raw_patch: str): - proc = subprocess.run('git apply --ignore-whitespace --whitespace=fix -', input=raw_patch, shell=True, text=True, cwd=self.repo.working_dir, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + """Apply a patch to the working copy.""" + proc = subprocess.run('git apply --ignore-whitespace --whitespace=fix -', input=raw_patch, shell=True, + text=True, cwd=self.repo.working_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if proc.returncode != 0: - raise Exception('Applying patch failed:\n{}'.format(proc.stdout + proc.stderr)) + raise ApplyPatchException('Applying patch failed:\n{}'.format(proc.stdout + proc.stderr)) self.repo.git.add('-A') - self.repo.index.commit(message='applying diff Phabricator Revision {}\n\ndiff: {}'.format(diff.revision, diff.id)) + self.repo.index.commit(message='applying Diff {} for Revision D{}\n\n Diff: {}'.format( + diff.id, diff.revision.id, diff.id)) - def _create_github(self) -> github.Github: + @staticmethod + def _create_github() -> github.Github: """Create instance of Github client. Reads access token from a file. @@ -114,9 +176,39 @@ class Phab2Github: token = json.load(json_file)['token'] return github.Github(token) + def _has_branch(self, item: Union["Diff", "Revision"]) -> bool: + """Check if the Diff/Revision has a local branch.""" + return item.branch_name in self.repo.heads + + def _delete_phab_branches(self): + """Delete all branches sarting with 'phab-'.""" + _LOGGER.info('Deleting local Phabricator-relates branches...') + self.repo.git.checkout('master') + for branch in [b for b in self.repo.heads if b.name.startswith('phab-')]: + _LOGGER.info('Deleding branch {}'.format(branch)) + self.repo.git.branch('-D', branch.name) + + def _create_patch(self, base: str, changes: str) -> str: + """Create patch from two commits.""" + proc = subprocess.run('git diff {} {}'.format(base, changes), shell=True, text=True, + cwd=self.repo.working_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if proc.returncode != 0: + raise ApplyPatchException('Applying patch failed:\n{}'.format(proc.stdout + proc.stderr)) + return proc.stdout + + def _branches_identical(self, left, right) -> bool: + """Check if two branches are identical.""" + try: + patch = self.repo.git.diff(left, right) + except git.GitCommandError: + return False + if len(patch) == 0: + return True + return False + if __name__ == '__main__': - logging.basicConfig(level=logging.DEBUG) + logging.basicConfig(level=logging.INFO) rootdir = os.path.dirname(os.path.abspath(__file__)) tmpdir = os.path.join(rootdir, 'tmp') p2g = Phab2Github(tmpdir) diff --git a/scripts/phab2github/phab_wrapper.py b/scripts/phab2github/phab_wrapper.py index 1802a28..7f6d0ba 100644 --- a/scripts/phab2github/phab_wrapper.py +++ b/scripts/phab2github/phab_wrapper.py @@ -20,6 +20,9 @@ from phabricator import Phabricator from typing import List, Optional, Dict import datetime + +_BASE_URL = 'https://reviews.llvm.org' + _LOGGER = logging.getLogger() @@ -29,6 +32,7 @@ class Revision: def __init__(self, revision_dict: Dict): self.revision_dict = revision_dict self.diffs = [] # type : "Diff" + self.phab_url = '{}/D{}'.format(_BASE_URL, self.id) # type: str @property def id(self) -> str: @@ -44,11 +48,7 @@ class Revision: def __str__(self): return 'Revision {}: {} - ({})'.format(self.id, self.status, - ','.join([str(d.id) for d in self.diffs])) - @property - def latest_diff(self): - # TODO: make sure self.diffs is sorted - return self.diffs[-1] + ','.join([str(d.id) for d in self.diffs])) @property def branch_name(self) -> str: @@ -58,16 +58,30 @@ class Revision: def title(self) -> str: return self.revision_dict['fields']['title'] + @property + def pr_title(self) -> str: + return 'D{}: {}'.format(self.id, self.title) + @property def summary(self) -> str: return self.revision_dict['fields']['summary'] + @property + def pr_summary(self) -> str: + return '{}\n\n{}'.format(self.summary, self.phab_url) + + @property + def sorted_diffs(self) -> List["Diff"]: + return sorted(self.diffs, key=lambda d: d.id) + + class Diff: """A Phabricator diff.""" def __init__(self, diff_dict: Dict, revision: Revision): self.diff_dict = diff_dict self.revision = revision + # TODO: check in git repo instead of using a flag @property def id(self) -> str: @@ -87,6 +101,10 @@ class Diff: return ref['identifier'] return None + @property + def branch_name(self) -> str: + return 'phab-diff-{}'.format(self.id) + class PhabWrapper: """ @@ -124,13 +142,14 @@ class PhabWrapper: start_date = datetime.datetime.now() - datetime.timedelta(days=3) constraints = { 'createdStart': int(start_date.timestamp()) + #'ids': [76120] } # TODO: handle > 100 responses revision_response = self.phab.differential.revision.search( constraints=constraints) - revisions = [Revision(r) for r in revision_response.response['data']] + revisions = [Revision(r, self.host) for r in revision_response.response['data']] # TODO: only taking the first 10 to speed things up - revisions = revisions[0:3] + revisions = revisions[0:10] _LOGGER.info('Got {} revisions from the server'.format(len(revisions))) for revision in revisions: # TODO: batch-query diffs for all revisions, reduce number of @@ -141,15 +160,16 @@ class PhabWrapper: def _get_diffs(self, revision: Revision): """Get diffs for a revision from Phabricator.""" - _LOGGER.info('Downloading diffs for {}...'.format(revision.id)) + _LOGGER.info('Downloading diffs for Revision D{}...'.format(revision.id)) constraints = { 'revisionPHIDs': [revision.phid] } diff_response = self.phab.differential.diff.search( constraints=constraints) revision.diffs = [Diff(d, revision) for d in diff_response.response['data']] + _LOGGER.info(', '.join([str(d.id) for d in revision.diffs])) def get_raw_patch(self, diff: Diff) -> str: """Get raw patch for diff from Phabricator.""" - _LOGGER.info('Downloading patch for {}...'.format(diff.id)) + _LOGGER.info('Downloading patch for Diff {}...'.format(diff.id)) return self.phab.differential.getrawdiff(diffID=str(diff.id)).response