Skip to content

Commit

Permalink
Merge pull request #90 from ESMCI/issue-86-detached-sync-status
Browse files Browse the repository at this point in the history
git: rework logic for determining in-sync and current ref name

Rework two aspects of the logic for git repositories; both of these can
change behavior in some cases when printing status:

1. Determining whether the local checkout is in-sync with the expected
   reference from the configuration file: Now we always convert the
   expected reference to a hash and compare that with the
   currently-checked-out hash. Previously, we sometimes did the
   comparison using names, e.g., just ensuring that you're on the right
   branch.

2. Determining the current ref name (e.g., the branch or tag currently
   checked out). Now we use a number of plumbing commands rather than
   relying on regex parsing of 'git branch -vv'. The previous regex
   parsing was fragile and hard to maintain, and was the source of a
   number of bugs. In addition, differences between git v. 1 and git
   v. 2 meant that the result was incorrect in some cases -
   particularly, in the case where we have "detached from foo" (which is
   the text that always appeared for a detached head in v 1, but in v 2
   means we are no longer at foo).

I have also overhauled the unit tests covering this functionality. Many
tests were no longer needed with the new logic and so I have removed
them. I have added some other tests covering the new functionality.

User interface changes?: Yes
- Subtle changes to status output, as described above
- One particular change is: If we're on a tracking branch,
  `checkout_externals -S -v` will show the name of the local branch
  rather than the tracked branch. (This is more accurate, because we may
  not actually be at the head of the tracking branch.)

Fixes: #86 (Status incorrectly reports in-sync when you have made
commits in detached head state - fixed due to the change in (1)).

Testing:
  test removed: many no-longer-relevant unit tests
  unit tests: pass
  system tests: pass
  manual testing: basic manual testing of checking out and running
    status, in the context of cesm and ctsm
  • Loading branch information
billsacks authored Apr 10, 2018
2 parents 3b624cf + b11ad61 commit 6923119
Show file tree
Hide file tree
Showing 4 changed files with 615 additions and 591 deletions.
224 changes: 115 additions & 109 deletions manic/repository_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import copy
import os
import re

from .global_constants import EMPTY_STR, LOCAL_PATH_INDICATOR
from .global_constants import VERBOSITY_VERBOSE
Expand Down Expand Up @@ -37,15 +36,6 @@ class GitRepository(Repository):
"""

# match XYZ of '* (HEAD detached at {XYZ}):
# e.g. * (HEAD detached at origin/feature-2)
RE_DETACHED = re.compile(
r'\* \((?:[\w]+[\s]+)?detached (?:at|from) ([\w\-./]+)\)')

# match tracking reference info, return XYZ from [XYZ]
# e.g. [origin/master]
RE_TRACKING = re.compile(r'\[([\w\-./]+)(?::[\s]+[\w\s,]+)?\]')

def __init__(self, component_name, repo):
"""
Parse repo (a <repo> XML element).
Expand Down Expand Up @@ -93,81 +83,42 @@ def _clone_repo(self, base_dir_path, repo_dir_name, verbosity):
self._git_clone(self._url, repo_dir_name, verbosity)
os.chdir(cwd)

def _current_ref_from_branch_command(self, git_output):
"""Parse output of the 'git branch -vv' command to determine the current
branch. The line starting with '*' is the current branch. It
can be one of the following head states:
1. On local branch
feature2 36418b4 [origin/feature2] Work on feature2
* feature3 36418b4 Work on feature2
master 9b75494 [origin/master] Initialize repository.
2. Detached from sha
* (HEAD detached at 36418b4) 36418b4 Work on feature2
feature2 36418b4 [origin/feature2] Work on feature2
master 9b75494 [origin/master] Initialize repository.
3. Detached from remote branch
* (HEAD detached at origin/feature2) 36418b4 Work on feature2
feature2 36418b4 [origin/feature2] Work on feature2
feature3 36418b4 Work on feature2
master 9b75494 [origin/master] Initialize repository.
4. Detached from tag
* (HEAD detached at clm4_5_18_r272) b837fc36 clm4_5_18_r272
5. On tracking branch. Note, may be may be ahead or behind remote.
* master 562bac9a [origin/master] more test junk
* master 408a8920 [origin/master: ahead 3] more junk
* master 408a8920 [origin/master: ahead 3, behind 2] more junk
* master 822d687d [origin/master: behind 3] more junk
NOTE: Parsing the output of the porcelain is probably not a
great idea, but there doesn't appear to be a single plumbing
command that will return the same info.
"""
lines = git_output.splitlines()
ref = ''
for line in lines:
if line.startswith('*'):
ref = line
break
current_ref = EMPTY_STR
if not ref:
# not a git repo? some other error? we return so the
# caller can handle.
pass
elif 'detached' in ref:
match = self.RE_DETACHED.search(ref)
try:
current_ref = match.group(1)
except BaseException:
msg = 'DEV_ERROR: regex to detect detached head state failed!'
msg += '\nref:\n{0}\ngit_output\n{1}\n'.format(ref, git_output)
fatal_error(msg)
elif '[' in ref:
match = self.RE_TRACKING.search(ref)
try:
current_ref = match.group(1)
except BaseException:
msg = 'DEV_ERROR: regex to detect tracking branch failed.'
msg += '\nref:\n{0}\ngit_output\n{1}\n'.format(ref, git_output)
fatal_error(msg)
else:
# assumed local branch
current_ref = ref.split()[1]
def _current_ref(self):
"""Determine the *name* associated with HEAD.
If we're on a branch, then returns the branch name; otherwise,
if we're on a tag, then returns the tag name; otherwise, returns
the current hash. Returns an empty string if no reference can be
determined (e.g., if we're not actually in a git repository).
"""
ref_found = False

# If we're on a branch, then use that as the current ref
branch_found, branch_name = self._git_current_branch()
if branch_found:
current_ref = branch_name
ref_found = True

if not ref_found:
# Otherwise, if we're exactly at a tag, use that as the
# current ref
tag_found, tag_name = self._git_current_tag()
if tag_found:
current_ref = tag_name
ref_found = True

if not ref_found:
# Otherwise, use current hash as the current ref
hash_found, hash_name = self._git_current_hash()
if hash_found:
current_ref = hash_name
ref_found = True

if not ref_found:
# If we still can't find a ref, return empty string. This
# can happen if we're not actually in a git repo
current_ref = ''

current_ref = current_ref.strip()
return current_ref

def _check_sync(self, stat, repo_dir_path):
Expand All @@ -194,12 +145,11 @@ def _check_sync(self, stat, repo_dir_path):
self._check_sync_logic(stat, repo_dir_path)

def _check_sync_logic(self, stat, repo_dir_path):
"""Isolate the complicated synce logic so it is not so deeply nested
and a bit easier to understand.
Sync logic - only reporting on whether we are on the ref
(branch, tag, hash) specified in the externals description.
"""Compare the underlying hashes of the currently checkout ref and the
expected ref.
Output: sets the sync_state as well as the current and
expected ref in the input status object.
"""
def compare_refs(current_ref, expected_ref):
Expand All @@ -215,8 +165,8 @@ def compare_refs(current_ref, expected_ref):
cwd = os.getcwd()
os.chdir(repo_dir_path)

git_output = self._git_branch_vv()
current_ref = self._current_ref_from_branch_command(git_output)
# get the full hash of the current commit
_, current_ref = self._git_current_hash()

if self._branch:
if self._url == LOCAL_PATH_INDICATOR:
Expand All @@ -230,22 +180,33 @@ def compare_refs(current_ref, expected_ref):
else:
expected_ref = "{0}/{1}".format(remote_name, self._branch)
elif self._hash:
# NOTE(bja, 2018-03) For comparison purposes, we could
# determine which is longer and check that the short ref
# is a substring of the long ref. But it is simpler to
# just expand both to the full sha and do an exact
# comparison.
_, expected_ref = self._git_revparse_commit(self._hash)
_, current_ref = self._git_revparse_commit(current_ref)
else:
expected_ref = self._hash
elif self._tag:
expected_ref = self._tag
else:
msg = 'In repo "{0}": none of branch, hash or tag are set'.format(
self._name)
fatal_error(msg)

# record the *names* of the current and expected branches
stat.current_version = self._current_ref()
stat.expected_version = copy.deepcopy(expected_ref)

stat.sync_state = compare_refs(current_ref, expected_ref)
if current_ref == EMPTY_STR:
stat.sync_state = ExternalStatus.UNKNOWN

stat.current_version = current_ref
stat.expected_version = expected_ref
else:
# get the underlying hash of the expected ref
revparse_status, expected_ref_hash = self._git_revparse_commit(
expected_ref)
if revparse_status:
# We failed to get the hash associated with
# expected_ref. Maybe we should assign this to some special
# status, but for now we're just calling this out-of-sync to
# remain consistent with how this worked before.
stat.sync_state = ExternalStatus.MODEL_MODIFIED
else:
# compare the underlying hashes
stat.sync_state = compare_refs(current_ref, expected_ref_hash)

os.chdir(cwd)

Expand Down Expand Up @@ -595,14 +556,58 @@ def _status_v1z_is_dirty(git_output):
#
# ----------------------------------------------------------------
@staticmethod
def _git_branch_vv():
"""Run git branch -vv to obtain verbose branch information, including
upstream tracking and hash.
def _git_current_hash():
"""Return the full hash of the currently checked-out version.
Returns a tuple, (hash_found, hash), where hash_found is a
logical specifying whether a hash was found for HEAD (False
could mean we're not in a git repository at all). (If hash_found
is False, then hash is ''.)
"""
cmd = ['git', 'branch', '--verbose', '--verbose']
git_output = execute_subprocess(cmd, output_to_caller=True)
return git_output
status, git_output = GitRepository._git_revparse_commit("HEAD")
hash_found = not status
if not hash_found:
git_output = ''
return hash_found, git_output

@staticmethod
def _git_current_branch():
"""Determines the name of the current branch.
Returns a tuple, (branch_found, branch_name), where branch_found
is a logical specifying whether a branch name was found for
HEAD. (If branch_found is False, then branch_name is ''.)
"""
cmd = ['git', 'symbolic-ref', '--short', '-q', 'HEAD']
status, git_output = execute_subprocess(cmd,
output_to_caller=True,
status_to_caller=True)
branch_found = not status
if branch_found:
git_output = git_output.strip()
else:
git_output = ''
return branch_found, git_output

@staticmethod
def _git_current_tag():
"""Determines the name tag corresponding to HEAD (if any).
Returns a tuple, (tag_found, tag_name), where tag_found is a
logical specifying whether we found a tag name corresponding to
HEAD. (If tag_found is False, then tag_name is ''.)
"""
# git describe --exact-match --tags HEAD
cmd = ['git', 'describe', '--exact-match', '--tags', 'HEAD']
status, git_output = execute_subprocess(cmd,
output_to_caller=True,
status_to_caller=True)
tag_found = not status
if tag_found:
git_output = git_output.strip()
else:
git_output = ''
return tag_found, git_output

@staticmethod
def _git_showref_tag(ref):
Expand Down Expand Up @@ -647,6 +652,7 @@ def _git_revparse_commit(ref):
'{0}^{1}'.format(ref, '{commit}'), ]
status, git_output = execute_subprocess(cmd, status_to_caller=True,
output_to_caller=True)
git_output = git_output.strip()
return status, git_output

@staticmethod
Expand Down
Loading

0 comments on commit 6923119

Please sign in to comment.