-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconflicted_ac_page_resolver.py
171 lines (143 loc) · 8.81 KB
/
conflicted_ac_page_resolver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# Can be used to resolve a common type of conflict in annotation collection page files that occurs when two separate
# users both add new annotations to the same collection. The default Git text merge driver does not deal well with JSON,
# because it is not aware of the logical structure of such a file (neither the bounds of JSON objects nor, in our case,
# the annotation ID are taken into account). In our case there is also quite a lot of repetition between distinct
# annotations, further complicating a simple line-by-line comparison.
#
# Such conflicts can sometimes be resolved quite easily and interactively through the GitLab UI with simple "ours" or
# "theirs" choices, but often require manual editing of the conflicted file (also, no user documentation exists for
# either approach at the time of writing this, so it almost inevitably becomes a support task).
#
# The below function automates the manual conflict resolution task, however BE AWARE THAT IT WILL NOT RESOLVE TRUE
# CONFLICTS, i.e. where different users have modified the SAME annotation, instead writing both versions to the merged
# file and outputting a warning about a mismatch, requiring further manual investigation. Therefore, conflicted files
# should not simply be processed without first being reviewed and/or checked afterwards, and you should check for any
# warnings being printed. This also has not been tested with scenarios where annotations are being deleted at the same
# time (although a test does exist, see tests/test_conflicted_ac_page_resolver.py).
#
# Expected input: a single annotation collection page file (JSON) with conflict markers, as can be fetched from GitLab
# by navigating to a blocked merge request, clicking on "Resolve conflicts" and then "Edit inline"
# Output: files named "our_file", "their_file" and "merged_file", where the first two should match "our" and
# "their" version of the file as found in the relevant branches at the time the input file was fetched
#
# TODOs: 1. see below
# 2. investigate writing a custom merge driver, refs:
# https://gregmicek.com/software-coding/2020/01/13/how-to-write-a-custom-git-merge-driver/
# https://git-scm.com/docs/gitattributes#_built_in_merge_drivers
# https://www.google.com/search?q=git+custom+merge+driver
import json
from collections import deque
from datetime import datetime
# HEAD_MARKER = "<<<<<<< HEAD"
HEAD_MARKER = "<<<<<<<"
SEPARATOR = "======="
# MASTER_MARKER = ">>>>>>> master"
MASTER_MARKER = ">>>>>>>"
CATMA_MARKUPTIMESTAMP_UUID = "CATMA_54A5F93F-5333-3F0D-92F7-7BD5930DB9E6"
def resolve(page_file):
with open(page_file, encoding="utf-8", newline=None) as f:
lines = f.readlines()
ours_start = None
ours_end = None
theirs_start = None
theirs_end = None
unconflicted_start_until_idx = None
unconflicted_end_from_idx = None
unconflicted_line_idxs = deque() # all unconflicted lines that appear between conflicts, TODO: use ranges directly?
# each element is a 4-tuple containing the start and end line indexes of 'our' and 'their' chunks respectively
conflicts = []
next_expected_marker = HEAD_MARKER
for idx, line in enumerate(lines):
if line.startswith(HEAD_MARKER) and next_expected_marker == HEAD_MARKER:
ours_start = idx + 1
next_expected_marker = SEPARATOR
if unconflicted_start_until_idx is None:
unconflicted_start_until_idx = idx # only set when the first conflict is encountered
if unconflicted_end_from_idx is not None and unconflicted_end_from_idx < idx:
unconflicted_line_idxs += list(range(unconflicted_end_from_idx, idx))
continue
elif line.startswith(SEPARATOR) and next_expected_marker == SEPARATOR:
ours_end = idx
theirs_start = idx + 1
next_expected_marker = MASTER_MARKER
continue
elif line.startswith(MASTER_MARKER) and next_expected_marker == MASTER_MARKER:
theirs_end = idx
next_expected_marker = HEAD_MARKER
unconflicted_end_from_idx = idx + 1 # keep overwriting until there are no more conflicts
#current_conflict_our_lines = lines[ours_start:ours_end]
#current_conflict_their_lines = lines[theirs_start:theirs_end]
conflicts.append((ours_start, ours_end, theirs_start, theirs_end))
continue
else:
# sanity check
if line.startswith((HEAD_MARKER, SEPARATOR, MASTER_MARKER)) and not line.startswith(next_expected_marker):
raise f"Expected marker '{next_expected_marker}' but encountered '{line[:-1]}' on line {idx+1}"
# had the following for ours and theirs first, but it results in nested lists
# our_file_lines = lines[:unconflicted_start_until] \
# + list(lines[conflict[0]:conflict[1]] for conflict in conflicts) \
# + lines[unconflicted_end_from:]
# ref: https://stackoverflow.com/a/56407963/207981
our_file_lines = lines[:unconflicted_start_until_idx]
their_file_lines = lines[:unconflicted_start_until_idx]
for conflict in conflicts:
while len(unconflicted_line_idxs) > 0 and unconflicted_line_idxs[0] < conflict[0]:
unconflicted_line_idx = unconflicted_line_idxs.popleft()
our_file_lines += lines[unconflicted_line_idx]
their_file_lines += lines[unconflicted_line_idx]
our_file_lines += lines[conflict[0]:conflict[1]]
their_file_lines += lines[conflict[2]:conflict[3]]
our_file_lines += lines[unconflicted_end_from_idx:]
their_file_lines += lines[unconflicted_end_from_idx:]
with open("our_file", "w+", encoding="utf-8", newline="\n") as our_file, \
open("their_file", "w+", encoding="utf-8", newline="\n") as their_file:
our_file.writelines(our_file_lines)
their_file.writelines(their_file_lines)
our_file.seek(0)
their_file.seek(0)
# TODO: consider fetching the two files directly from GitLab given a link to the blocked merge request,
# then feed into the below, or just fetch the conflicted file with the conflict markers
our_annotations = json.load(our_file)
their_annotations = json.load(their_file)
our_annotation_ids = [annotation["id"][-42:] for annotation in our_annotations]
their_annotation_ids = [annotation["id"][-42:] for annotation in their_annotations]
their_new_annotation_ids = list(set(their_annotation_ids) - set(our_annotation_ids))
their_new_annotations = [annotation for annotation in their_annotations \
if annotation["id"][-42:] in their_new_annotation_ids]
# ensure that annotations occurring in both files are equal
both_annotation_ids = list(set(their_annotation_ids) & set(our_annotation_ids))
for our_annotation in our_annotations:
if our_annotation["id"][-42:] not in both_annotation_ids:
continue
for their_annotation in their_annotations:
if their_annotation["id"][-42:] != our_annotation["id"][-42:]:
continue
if their_annotation != our_annotation:
print(f"WARNING: Mismatch in annotations with ID {our_annotation["id"][-42:]}, check manually")
# append their mismatched annotation to their_new_annotations so that it is written to the merged file
# for manual investigation
their_new_annotations += [their_annotation]
# insert their new annotations in the appropriate place according to timestamps
our_annotations.reverse()
for their_new_annotation in their_new_annotations:
was_added = False
for idx, our_annotation in enumerate(our_annotations):
if datetime.fromisoformat(
our_annotation["body"]["properties"]["system"][CATMA_MARKUPTIMESTAMP_UUID][0]
) <= datetime.fromisoformat(
their_new_annotation["body"]["properties"]["system"][CATMA_MARKUPTIMESTAMP_UUID][0]
):
our_annotations.insert(idx, their_new_annotation)
was_added = True
break
else:
if idx == len(our_annotations) - 1:
our_annotations.append(their_new_annotation)
was_added = True
if not was_added:
raise "Failed to add annotation"
# our_annotations now contains the merged set of annotations
# reverse it again to put them in the correct chronological order
our_annotations.reverse()
with open("merged_file", "w", encoding="utf-8", newline="\n") as merged_file:
json.dump(our_annotations, merged_file, indent=2)