diff --git a/tomotwin/embed_main.py b/tomotwin/embed_main.py index 4b742b4..34ad39a 100644 --- a/tomotwin/embed_main.py +++ b/tomotwin/embed_main.py @@ -378,8 +378,6 @@ import hashlib import os import random -import resource -import sys from typing import List import numpy as np @@ -600,11 +598,6 @@ def run_distr(config, world_size: int): Starts a distributed run using DistributedDataParallel """ mp.set_sharing_strategy('file_system') - limit = resource.getrlimit(resource.RLIMIT_NOFILE) - if limit[0] < 65000: - print( - f"Your user limit ('ulimit -n') is too low ({limit[0]}). Please run 'ulimit -n 65000' before running tomotwin_embed.") - sys.exit(1) print(f"Found {world_size} GPU(s). Start DDP + Compiling.") os.environ['MASTER_ADDR'] = '127.0.0.1' os.environ['MASTER_PORT'] = '29' + str(random.randint(1, 500)).zfill(3) diff --git a/tomotwin/modules/inference/embedor.py b/tomotwin/modules/inference/embedor.py index b0b7d62..ae81957 100644 --- a/tomotwin/modules/inference/embedor.py +++ b/tomotwin/modules/inference/embedor.py @@ -374,7 +374,7 @@ This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0. """ - +import copy from abc import ABC, abstractmethod import numpy as np @@ -586,8 +586,9 @@ def embed(self, volume_data: VolumeDataset) -> np.array: with torch.autocast(device_type='cuda', dtype=torch.float16): subvolume = self.model.forward(subvolume).type(torch.HalfTensor) subvolume = subvolume.data.cpu() - items_indicis.append(item_index.data.cpu()) - embeddings.append(subvolume.data.cpu()) + items_indicis.append(copy.deepcopy(item_index.data.cpu())) + embeddings.append(copy.deepcopy(subvolume.data.cpu())) + del subvolume ## Sync items items_indicis = torch.cat(items_indicis) # .to(self.rank) # necessary because of nccl