-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from emilbols/djc2
migration to djc2
- Loading branch information
Showing
13 changed files
with
724 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
DeepJet: Repository for training and evaluation of deep neural networks for Jet identification | ||
=============================================================================== | ||
|
||
This package depends on DeepJetCore 2.X (https://github.com/DL4Jets/DeepJetCore). | ||
|
||
Usage | ||
============== | ||
|
||
After logging in, please source the environment (please cd to the directory first!): | ||
``` | ||
cd <your working dir>/DeepJet | ||
source env.sh | ||
``` | ||
|
||
|
||
The preparation for the training consists of the following steps | ||
==== | ||
|
||
- define the data structure for the training. The DeepJet datastructure is found in the modules directory as the class TrainData_DF. | ||
|
||
- convert the root file to the data strucure for training using DeepJetCore tools: | ||
``` | ||
convertFromSource.py -i /path/to/the/root/ntuple/list_of_root_files.txt -o /output/path/that/needs/some/disk/space -c TrainData_DF | ||
``` | ||
|
||
This step can take a while. | ||
|
||
|
||
- prepare the training file and the model. Please refer to DeepJet/Train/train_DeepFlavour.py | ||
|
||
|
||
|
||
Training | ||
==== | ||
|
||
Since the training can take a while, it is advised to open a screen session, such that it does not die at logout. | ||
``` | ||
ssh lxplus.cern.ch | ||
<note the machine you are on, e.g. lxplus058> | ||
screen | ||
ssh lxplus7 | ||
``` | ||
Then source the environment, and proceed with the training. Detach the screen session with ctr+a d. | ||
You can go back to the session by logging in to the machine the session is running on (e.g. lxplus58): | ||
|
||
``` | ||
ssh lxplus.cern.ch | ||
ssh lxplus058 | ||
screen -r | ||
``` | ||
|
||
Please close the session when the training is finished | ||
|
||
the training is launched in the following way: | ||
``` | ||
python train_DeepFlavour.py /path/to/the/output/of/convert/dataCollection.dc <output dir of your choice> | ||
``` | ||
|
||
|
||
Evaluation | ||
==== | ||
|
||
After the training has finished, the performance can be evaluated. | ||
|
||
``` | ||
predict.py <output dir of training>/KERAS_model.h5 <output dir of training>/trainsamples.dc <dir with test sample stored as rootfiles>/filelist.txt <output directory> | ||
``` | ||
|
||
This creates output trees with the prediction scores as well as truth information and some kinematic variables. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
|
||
#import sys | ||
#import tensorflow as tf | ||
#sys.modules["keras"] = tf.keras | ||
|
||
from DeepJetCore.training.training_base import training_base | ||
from DeepJetCore.modeltools import fixLayersContaining,printLayerInfosAndWeights | ||
|
||
|
||
#also does all the parsing | ||
train=training_base(testrun=False) | ||
|
||
newtraining= not train.modelSet() | ||
#for recovering a training | ||
if newtraining: | ||
from models import model_deepFlavourReference | ||
|
||
train.setModel(model_deepFlavourReference,dropoutRate=0.1,momentum=0.3) | ||
|
||
#train.keras_model=fixLayersContaining(train.keras_model, 'regression', invert=False) | ||
|
||
train.compileModel(learningrate=0.001, | ||
loss='categorical_crossentropy', | ||
metrics=['accuracy']) | ||
|
||
|
||
train.train_data.maxFilesOpen=1 | ||
|
||
print(train.keras_model.summary()) | ||
model,history = train.trainModel(nepochs=1, | ||
batchsize=10000, | ||
stop_patience=300, | ||
lr_factor=0.5, | ||
lr_patience=--1, | ||
lr_epsilon=0.0001, | ||
lr_cooldown=6, | ||
lr_minimum=0.0001) | ||
|
||
|
||
print('fixing input norms...') | ||
train.keras_model=fixLayersContaining(train.keras_model, 'input_batchnorm') | ||
train.compileModel(learningrate=0.0001, | ||
loss='categorical_crossentropy', | ||
metrics=['accuracy']) | ||
|
||
print(train.keras_model.summary()) | ||
#printLayerInfosAndWeights(train.keras_model) | ||
|
||
model,history = train.trainModel(nepochs=65, #sweet spot from looking at the testing plots | ||
batchsize=10000, | ||
stop_patience=300, | ||
lr_factor=0.5, | ||
lr_patience=-1, | ||
lr_epsilon=0.0001, | ||
lr_cooldown=10, | ||
lr_minimum=0.00001, | ||
verbose=1,checkperiod=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
#! /bin/bash | ||
|
||
export DJSUBPACKAGE=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd -P) | ||
export DEEPJETCORE_SUBPACKAGE=$DJSUBPACKAGE | ||
|
||
cd $DJSUBPACKAGE | ||
export PYTHONPATH=$DJSUBPACKAGE/modules:$PYTHONPATH | ||
export PYTHONPATH=$DJSUBPACKAGE/modules/datastructures:$PYTHONPATH | ||
export PATH=$DJSUBPACKAGE/scripts:$PATH | ||
|
||
export LD_LIBRARY_PATH=$DJSUBPACKAGE/modules/compiled:$LD_LIBRARY_PATH | ||
export PYTHONPATH=$DJSUBPACKAGE/modules/compiled:$PYTHONPATH | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
global_layers_list = {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
|
||
# Define custom losses here and add them to the global_loss_list dict (important!) | ||
global_loss_list = {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
|
||
# Define custom metrics here and add them to the global_metrics_list dict (important!) | ||
global_metrics_list = {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
|
||
|
||
|
||
# | ||
# This file might need some adjustments but should serve as a good basis | ||
# | ||
|
||
PYTHON_INCLUDE = `python-config --includes` | ||
PYTHON_LIB=`python-config --libs` | ||
|
||
ROOTSTUFF=`root-config --libs --glibs --ldflags` | ||
ROOTCFLAGS=`root-config --cflags` | ||
|
||
CPP_FILES := $(wildcard src/*.cpp) | ||
OBJ_FILES := $(addprefix obj/,$(notdir $(CPP_FILES:.cpp=.o))) | ||
LD_FLAGS := `root-config --cflags --glibs` -lMathMore -L${DEEPJETCORE}/compiled -ldeepjetcorehelpers -lquicklz | ||
CC_FLAGS := -fPIC -g -Wall `root-config --cflags` | ||
CC_FLAGS += -I./interface -I${DEEPJETCORE}/compiled/interface | ||
DJC_LIB = -L${DEEPJETCORE}/compiled -ldeepjetcorehelpers | ||
|
||
|
||
MODULES := $(wildcard src/*.C) | ||
MODULES_OBJ_FILES := $(addprefix ./,$(notdir $(MODULES:.C=.o))) | ||
MODULES_SHARED_LIBS := $(addprefix ./,$(notdir $(MODULES:.C=.so))) | ||
|
||
|
||
all: $(MODULES_SHARED_LIBS) $(patsubst bin/%.cpp, %, $(wildcard bin/*.cpp)) | ||
|
||
#compile the module helpers if necessary | ||
#../modules/libsubpackagehelpers.so: | ||
# cd ../modules; make; cd - | ||
|
||
%: bin/%.cpp $(OBJ_FILES) | ||
g++ $(CC_FLAGS) $(LD_FLAGS) $(OBJ_FILES) $< -o $@ | ||
|
||
|
||
obj/%.o: src/%.cpp | ||
g++ $(CC_FLAGS) -c -o $@ $< | ||
|
||
|
||
#python modules | ||
|
||
%.so: %.o | ||
g++ -o $(@) -shared -fPIC $(LINUXADD) $< $(ROOTSTUFF) $(PYTHON_LIB) -lboost_python -lboost_numpy $(DJC_LIB) | ||
|
||
%.o: src/%.C | ||
g++ $(ROOTCFLAGS) -O2 $(CC_FLAGS) -I./interface $(PYTHON_INCLUDE) -fPIC -c -o $(@) $< | ||
|
||
|
||
clean: | ||
rm -f obj/*.o obj/*.d *.so | ||
rm -f % | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
|
||
|
||
#include <boost/python.hpp> | ||
#include "boost/python/numpy.hpp" | ||
#include "boost/python/list.hpp" | ||
#include "boost/python/str.hpp" | ||
#include <boost/python/exception_translator.hpp> | ||
#include <exception> | ||
|
||
//includes from deepjetcore | ||
#include "helper.h" | ||
#include "simpleArray.h" | ||
|
||
namespace p = boost::python; | ||
namespace np = boost::python::numpy; | ||
|
||
/* | ||
* Example of a python module that will be compiled. | ||
* It can be used, e.g. to convert from fully custom input data | ||
*/ | ||
|
||
np::ndarray readFirstFeatures(std::string infile){ | ||
|
||
auto arr = djc::simpleArray<float>({10,3,4}); | ||
arr.at(0,2,1) = 5. ;//filling some data | ||
|
||
return simpleArrayToNumpy(arr); | ||
} | ||
|
||
BOOST_PYTHON_MODULE(c_convert) { | ||
Py_Initialize(); | ||
np::initialize(); | ||
def("readFirstFeatures", &readFirstFeatures); | ||
} | ||
|
Oops, something went wrong.