migration to djc2

DL4Jets · Feb 17, 2020 · b951f09 · b951f09
1 parent 21c0e83
commit b951f09
Show file tree

Hide file tree

Showing 13 changed files with 724 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,69 @@
+DeepJet: Repository for training and evaluation of deep neural networks for Jet identification
+===============================================================================
+
+This package depends on DeepJetCore 2.X (https://github.com/DL4Jets/DeepJetCore).
+
+Usage
+==============
+
+After logging in, please source the environment (please cd to the directory first!):
+```
+cd <your working dir>/DeepJet
+source env.sh
+```
+
+
+The preparation for the training consists of the following steps
+====
+
+- define the data structure for the training. The DeepJet datastructure is found in the modules directory as the class TrainData_DF.
+
+- convert the root file to the data strucure for training using DeepJetCore tools:
+  ```
+    convertFromSource.py -i /path/to/the/root/ntuple/list_of_root_files.txt -o /output/path/that/needs/some/disk/space -c TrainData_DF
+      ```
+
+  This step can take a while.
+
+
+- prepare the training file and the model. Please refer to DeepJet/Train/train_DeepFlavour.py
+
+
+
+Training
+====
+
+Since the training can take a while, it is advised to open a screen session, such that it does not die at logout.
+```
+ssh lxplus.cern.ch
+<note the machine you are on, e.g. lxplus058>
+screen
+ssh lxplus7
+```
+Then source the environment, and proceed with the training. Detach the screen session with ctr+a d.
+You can go back to the session by logging in to the machine the session is running on (e.g. lxplus58):
+
+```
+ssh lxplus.cern.ch
+ssh lxplus058
+screen -r
+```
+
+Please close the session when the training is finished
+
+the training is launched in the following way:
+```
+python train_DeepFlavour.py /path/to/the/output/of/convert/dataCollection.dc <output dir of your choice>
+```
+
+
+Evaluation
+====
+
+After the training has finished, the performance can be evaluated.
+
+```
+predict.py <output dir of training>/KERAS_model.h5  <output dir of training>/trainsamples.dc <dir with test sample stored as rootfiles>/filelist.txt <output directory>
+```
+
+This creates output trees with the prediction scores as well as truth information and some kinematic variables.
diff --git a/Train/train_DeepFlavour.py b/Train/train_DeepFlavour.py
@@ -0,0 +1,57 @@
+
+#import sys
+#import tensorflow as tf
+#sys.modules["keras"] = tf.keras
+
+from DeepJetCore.training.training_base import training_base
+from DeepJetCore.modeltools import fixLayersContaining,printLayerInfosAndWeights
+
+
+#also does all the parsing
+train=training_base(testrun=False)
+
+newtraining= not train.modelSet()
+#for recovering a training
+if newtraining:
+    from models import model_deepFlavourReference
+
+    train.setModel(model_deepFlavourReference,dropoutRate=0.1,momentum=0.3)
+
+    #train.keras_model=fixLayersContaining(train.keras_model, 'regression', invert=False)
+
+    train.compileModel(learningrate=0.001,
+                       loss='categorical_crossentropy',
+                       metrics=['accuracy'])
+
+
+    train.train_data.maxFilesOpen=1
+
+    print(train.keras_model.summary())
+    model,history = train.trainModel(nepochs=1, 
+                                     batchsize=10000, 
+                                     stop_patience=300, 
+                                     lr_factor=0.5, 
+                                     lr_patience=--1, 
+                                     lr_epsilon=0.0001, 
+                                     lr_cooldown=6, 
+                                     lr_minimum=0.0001)
+
+
+    print('fixing input norms...')
+    train.keras_model=fixLayersContaining(train.keras_model, 'input_batchnorm')
+train.compileModel(learningrate=0.0001,
+                   loss='categorical_crossentropy',
+                   metrics=['accuracy'])
+
+print(train.keras_model.summary())
+#printLayerInfosAndWeights(train.keras_model)
+
+model,history = train.trainModel(nepochs=65, #sweet spot from looking at the testing plots 
+                                 batchsize=10000, 
+                                 stop_patience=300, 
+                                 lr_factor=0.5, 
+                                 lr_patience=-1, 
+                                 lr_epsilon=0.0001, 
+                                 lr_cooldown=10, 
+                                 lr_minimum=0.00001,
+                                 verbose=1,checkperiod=1)
diff --git a/env.sh b/env.sh
@@ -0,0 +1,14 @@
+
+#! /bin/bash
+
+export DJSUBPACKAGE=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd -P)
+export DEEPJETCORE_SUBPACKAGE=$DJSUBPACKAGE
+
+cd $DJSUBPACKAGE
+export PYTHONPATH=$DJSUBPACKAGE/modules:$PYTHONPATH
+export PYTHONPATH=$DJSUBPACKAGE/modules/datastructures:$PYTHONPATH
+export PATH=$DJSUBPACKAGE/scripts:$PATH
+
+export LD_LIBRARY_PATH=$DJSUBPACKAGE/modules/compiled:$LD_LIBRARY_PATH
+export PYTHONPATH=$DJSUBPACKAGE/modules/compiled:$PYTHONPATH
+
diff --git a/modules/Layers.py b/modules/Layers.py
@@ -0,0 +1 @@
+global_layers_list = {}
diff --git a/modules/Losses.py b/modules/Losses.py
@@ -0,0 +1,3 @@
+
+# Define custom losses here and add them to the global_loss_list dict (important!)
+global_loss_list = {}
diff --git a/modules/Metrics.py b/modules/Metrics.py
@@ -0,0 +1,3 @@
+
+# Define custom metrics here and add them to the global_metrics_list dict (important!)
+global_metrics_list = {}
diff --git a/modules/compiled/Makefile b/modules/compiled/Makefile
@@ -0,0 +1,53 @@
+
+
+
+#
+# This file might need some adjustments but should serve as a good basis
+#
+
+PYTHON_INCLUDE = `python-config --includes`
+PYTHON_LIB=`python-config --libs`
+
+ROOTSTUFF=`root-config --libs --glibs --ldflags`
+ROOTCFLAGS=`root-config  --cflags`
+
+CPP_FILES := $(wildcard src/*.cpp)
+OBJ_FILES := $(addprefix obj/,$(notdir $(CPP_FILES:.cpp=.o)))
+LD_FLAGS := `root-config --cflags --glibs`  -lMathMore -L${DEEPJETCORE}/compiled -ldeepjetcorehelpers -lquicklz
+CC_FLAGS := -fPIC -g -Wall `root-config --cflags`
+CC_FLAGS += -I./interface -I${DEEPJETCORE}/compiled/interface
+DJC_LIB = -L${DEEPJETCORE}/compiled -ldeepjetcorehelpers 
+
+
+MODULES := $(wildcard src/*.C)
+MODULES_OBJ_FILES := $(addprefix ./,$(notdir $(MODULES:.C=.o)))
+MODULES_SHARED_LIBS := $(addprefix ./,$(notdir $(MODULES:.C=.so)))
+
+
+all: $(MODULES_SHARED_LIBS) $(patsubst bin/%.cpp, %, $(wildcard bin/*.cpp))
+
+#compile the module helpers if necessary
+#../modules/libsubpackagehelpers.so:
+#        cd ../modules; make; cd -
+
+%: bin/%.cpp  $(OBJ_FILES) 
+	g++ $(CC_FLAGS) $(LD_FLAGS) $(OBJ_FILES) $< -o $@ 
+
+
+obj/%.o: src/%.cpp
+	g++ $(CC_FLAGS) -c -o $@ $<
+
+
+#python modules
+
+%.so: %.o 
+	g++  -o $(@) -shared -fPIC  $(LINUXADD) $<   $(ROOTSTUFF)  $(PYTHON_LIB) -lboost_python -lboost_numpy $(DJC_LIB)
+
+%.o: src/%.C 
+	g++   $(ROOTCFLAGS) -O2 $(CC_FLAGS) -I./interface $(PYTHON_INCLUDE) -fPIC -c -o $(@) $<
+
+
+clean: 
+	rm -f obj/*.o obj/*.d *.so
+	rm -f %
+
diff --git a/modules/compiled/src/c_convert.C b/modules/compiled/src/c_convert.C
@@ -0,0 +1,35 @@
+
+
+#include <boost/python.hpp>
+#include "boost/python/numpy.hpp"
+#include "boost/python/list.hpp"
+#include "boost/python/str.hpp"
+#include <boost/python/exception_translator.hpp>
+#include <exception>
+
+//includes from deepjetcore
+#include "helper.h"
+#include "simpleArray.h"
+
+namespace p = boost::python;
+namespace np = boost::python::numpy;
+
+/*
+ * Example of a python module that will be compiled.
+ * It can be used, e.g. to convert from fully custom input data
+ */
+
+np::ndarray readFirstFeatures(std::string infile){
+
+    auto arr = djc::simpleArray<float>({10,3,4});
+    arr.at(0,2,1) = 5. ;//filling some data
+
+    return simpleArrayToNumpy(arr);
+}
+
+BOOST_PYTHON_MODULE(c_convert) {
+    Py_Initialize();
+    np::initialize();
+    def("readFirstFeatures", &readFirstFeatures);
+}
+