diff --git a/DataCollection.py b/DataCollection.py index cf37730..abf7240 100644 --- a/DataCollection.py +++ b/DataCollection.py @@ -195,6 +195,7 @@ def getUsedTruth(self): def setBatchSize(self,bsize): if bsize > self.nsamples: + print("Batch size = ", bsize, "Sample = ", self.nsamples) raise Exception('Batch size must not be bigger than total sample size') self.__batchsize=bsize @@ -576,8 +577,8 @@ def __collectWriteInfo(successful,samplename,sampleentries,outputDir): nchilds = int(cpu_count()/2)-2 if self.nprocs <= 0 else self.nprocs #import os - #if 'nvidiagtx1080' in os.getenv('HOSTNAME'): - # nchilds=cpu_count()-5 + if 'max' in os.getenv('HOSTNAME'): + nchilds=int(cpu_count()/3)-2 if nchilds<1: nchilds=1 diff --git a/TrainData.py b/TrainData.py index 1a8da1a..14277d7 100644 --- a/TrainData.py +++ b/TrainData.py @@ -64,9 +64,9 @@ def _read_arrs_(arrwl,arrxl,arryl,arrzl,doneVal,fileprefix,tdref=None,randomSeed doneVal.value=True h5f.close() del h5f - #except Exception as d: - # raise d - #finally: + except Exception as d: + raise d + finally: if tdref: tdref.removeRamDiskFile() @@ -479,7 +479,7 @@ def readIn_abort(self): def readIn_join(self,wasasync=True,waitforStart=True): - if True: + try: if not not hasattr(self, 'readthreadids') and not waitforStart and not self.readthread and wasasync: print('\nreadIn_join:read never started\n') diff --git a/training/tokenTools.py b/training/tokenTools.py index ceff503..fd12379 100644 --- a/training/tokenTools.py +++ b/training/tokenTools.py @@ -1,5 +1,5 @@ -renewtokens=True +renewtokens=False def renew_token_process(): diff --git a/training/training_base.py b/training/training_base.py index 48d8212..0daf56c 100644 --- a/training/training_base.py +++ b/training/training_base.py @@ -47,7 +47,7 @@ def __init__( self, splittrainandtest=0.85, useweights=False, testrun=False, resumeSilently=False, - renewtokens=True, + renewtokens=False, collection_class=DataCollection, parser=None ): @@ -108,7 +108,8 @@ def __init__( self.trainedepoches=0 self.compiled=False self.checkpointcounter=0 - self.renewtokens=renewtokens + #self.renewtokens=renewtokens + self.renewtokens=False self.inputData = os.path.abspath(args.inputDataCollection) \