Skip to content

Commit

Permalink
Merge pull request #949 from girder/annotation-large-user-records
Browse files Browse the repository at this point in the history
Handle large user records in annotation elements.
  • Loading branch information
manthey authored Aug 30, 2022
2 parents 7515336 + 051c88d commit 0ec0238
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- Fix iterating tiles where the overlap larger than the tile size ([940](../../pull/940))
- Better ignore tiff directories that aren't part of the pyramid ([943](../../pull/943))
- Fix an issue with styling frames in ome tiffs ([945](../../pull/945))
- Better handle large user records in annotation elements ([949](../../pull/949))

### Changes
- Adjusted rest request logging rates for region endpoint ([948](../../pull/948))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
# store part of them in an associated file. This is slower, so don't do it for
# small ones.
MAX_ELEMENT_DOCUMENT = 10000
MAX_ELEMENT_USER_DOCUMENT = 1000000


class Annotationelement(Model):
Expand Down Expand Up @@ -291,6 +292,17 @@ def yieldElements(self, annotation, region=None, info=None): # noqa
data.write(chunk)
data.seek(0)
element[datafile['key']] = pickle.load(data)
if 'userFileId' in datafile:
data = io.BytesIO()
chunksize = 1024 ** 2
with File().open(File().load(datafile['userFileId'], force=True)) as fptr:
while True:
chunk = fptr.read(chunksize)
if not len(chunk):
break
data.write(chunk)
data.seek(0)
element['user'] = pickle.load(data)
if region.get('bbox') and 'bbox' in entry:
element['_bbox'] = entry['bbox']
if 'bbox' not in info:
Expand Down Expand Up @@ -327,9 +339,11 @@ def removeWithQuery(self, query):
attachedQuery = query.copy()
attachedQuery['datafile'] = {'$exists': True}
for element in self.collection.find(attachedQuery):
file = File().load(element['datafile']['fileId'], force=True)
if file:
File().remove(file)
for key in {'fileId', 'userFileId'}:
if key in element['datafile']:
file = File().load(element['datafile'][key], force=True)
if file:
File().remove(file)
self.collection.bulk_write([pymongo.DeleteMany(query)], ordered=False)

def removeElements(self, annotation):
Expand Down Expand Up @@ -505,10 +519,19 @@ def saveElementAsFile(self, annotation, entries):
io.BytesIO(data), size=len(data), name='_annotationElementData',
parentType='item', parent=item, user=None,
mimeType='application/json', attachParent=True)
userdata = None
if 'user' in element:
userdata = pickle.dumps(element.pop('user'), protocol=4)
userFile = Upload().uploadFromFile(
io.BytesIO(userdata), size=len(userdata), name='_annotationElementUserData',
parentType='item', parent=item, user=None,
mimeType='application/json', attachParent=True)
entries[0]['datafile'] = {
'key': key,
'fileId': elementFile['_id'],
}
if userdata:
entries[0]['datafile']['userFileId'] = userFile['_id']

def updateElementChunk(self, elements, chunk, chunkSize, annotation, now):
"""
Expand All @@ -525,8 +548,10 @@ def updateElementChunk(self, elements, chunk, chunkSize, annotation, now):
'element': element
} for element in elements[chunk:chunk + chunkSize]]
prepTime = time.time() - chunkStartTime
if (len(entries) == 1 and len(entries[0]['element'].get(
'points', entries[0]['element'].get('values', []))) > MAX_ELEMENT_DOCUMENT):
if (len(entries) == 1 and (len(entries[0]['element'].get(
'points', entries[0]['element'].get('values', []))) > MAX_ELEMENT_DOCUMENT or (
'user' in entries[0]['element'] and
len(pickle.dumps(entries[0]['element'], protocol=4) > MAX_ELEMENT_USER_DOCUMENT)))):
self.saveElementAsFile(annotation, entries)
res = self.collection.insert_many(entries, ordered=False)
for pos, entry in enumerate(entries):
Expand Down

0 comments on commit 0ec0238

Please sign in to comment.