Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FedLearner Framework and Core Dependency RA-TLS Configuration #1097

Merged
merged 7 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 19 additions & 12 deletions fedlearner-sgx-dev.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ ENV DEBIAN_FRONTEND=noninteractive
ENV INSTALL_PREFIX=/usr/local
ENV LD_LIBRARY_PATH=${INSTALL_PREFIX}/lib:${INSTALL_PREFIX}/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}
ENV PATH=${INSTALL_PREFIX}/bin:${LD_LIBRARY_PATH}:${PATH}
# For Gramine RA-TLS
ENV PYTHONDONTWRITEBYTECODE=1

# Add steps here to set up common dependencies
RUN apt-get update \
Expand Down Expand Up @@ -60,8 +62,8 @@ RUN apt-get install -y libcurl4-openssl-dev libprotobuf-c-dev python3-protobuf p
RUN apt-get install -y libgmp-dev libmpfr-dev libmpc-dev libisl-dev nasm protobuf-compiler

RUN ln -s /usr/bin/python3 /usr/bin/python \
&& pip3 install --upgrade pip \
&& pip3 install toml meson pyelftools
&& pip3 install --no-compile --upgrade pip -i https://mirrors.aliyun.com/pypi/simple/ \
gejielun marked this conversation as resolved.
Show resolved Hide resolved
&& pip3 install --no-compile toml meson pyelftools -i https://mirrors.aliyun.com/pypi/simple/

RUN git clone https://github.com/analytics-zoo/gramine ${GRAMINEDIR} \
&& cd ${GRAMINEDIR} \
Expand Down Expand Up @@ -101,17 +103,17 @@ ENV GRPC_VERSION=v1.38.1

RUN git clone --recurse-submodules -b ${GRPC_VERSION} https://github.com/grpc/grpc ${GRPC_PATH}

RUN pip3 install --upgrade pip \
&& pip3 install -r ${GRPC_PATH}/requirements.txt
RUN pip3 install --no-compile --upgrade pip -i https://mirrors.aliyun.com/pypi/simple/ \
&& pip3 install --no-compile -r ${GRPC_PATH}/requirements.txt -i https://mirrors.aliyun.com/pypi/simple/

# Tensorflow dependencies
ENV BAZEL_VERSION=3.1.0
ENV TF_VERSION=v2.4.2
ENV TF_BUILD_PATH=/tf/src
ENV TF_BUILD_OUTPUT=/tf/output

RUN pip3 install --upgrade pip \
&& pip3 install numpy keras_preprocessing
RUN pip3 install --no-compile --upgrade pip -i https://mirrors.aliyun.com/pypi/simple/ \
&& pip3 install --no-compile numpy keras_preprocessing -i https://mirrors.aliyun.com/pypi/simple/

RUN wget "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel_${BAZEL_VERSION}-linux-x86_64.deb" \
&& dpkg -i bazel_*.deb
Expand All @@ -127,7 +129,7 @@ RUN apt-get install -y libmysqlclient-dev
COPY sgx/grpc/common ${GRPC_PATH}
COPY sgx/grpc/v1.38.1 ${GRPC_PATH}

RUN pip3 install 'cython==0.29.36'
RUN pip3 install --no-compile 'cython==0.29.36' -i https://mirrors.aliyun.com/pypi/simple/
RUN ${GRPC_PATH}/build_python.sh

# Build tensorflow
Expand All @@ -150,17 +152,17 @@ RUN if [ -f ${FEDLEARNER_PATH}/docker/hadoop-mt-2.7.0.tar.gz ]; then mkdir -p /o
# For meituan hadoop auth
RUN apt-get install -y libkrb5-dev openjdk-8-jdk

RUN pip3 install --upgrade pip \
&& pip3 install -r ${FEDLEARNER_PATH}/requirements.txt -i https://mirrors.aliyun.com/pypi/simple/
RUN pip3 install --no-compile --upgrade pip -i https://mirrors.aliyun.com/pypi/simple/ \
&& pip3 install --no-compile -r ${FEDLEARNER_PATH}/requirements.txt -i https://mirrors.aliyun.com/pypi/simple/

RUN cd ${FEDLEARNER_PATH} \
&& make protobuf \
&& python3 setup.py bdist_wheel \
&& pip3 install ./dist/*.whl
&& pip3 install --no-compile ./dist/*.whl

# Re-install tensorflow, uninstall tensorflow_io, mock it
RUN pip3 uninstall -y tensorflow tensorflow-io \
&& pip3 install ${TF_BUILD_OUTPUT}/*.whl
&& pip3 install --no-compile ${TF_BUILD_OUTPUT}/*.whl

# Re-install fedlearner plugin
RUN cd ${FEDLEARNER_PATH} \
Expand All @@ -170,7 +172,7 @@ RUN cd ${FEDLEARNER_PATH} \

# Re-install grpcio
RUN pip3 uninstall -y grpcio \
&& pip3 install ${GRPC_PATH}/dist/grpcio*.whl
&& pip3 install --no-compile ${GRPC_PATH}/dist/grpcio*.whl

# For debug
RUN apt-get install -y strace gdb ctags vim
Expand All @@ -182,6 +184,11 @@ COPY sgx/configs /
RUN echo "enabled=0" > /etc/default/apport
RUN echo "exit 0" > /usr/sbin/policy-rc.d

# For gramine ra-tls
RUN dpkg --remove --force-depends libgtk2.0-0 \
&& pip3 uninstall -y numpy keras_preprocessing protobuf \
&& pip3 install --no-compile numpy keras_preprocessing protobuf -i https://mirrors.aliyun.com/pypi/simple/

# Clean tmp files
RUN apt-get clean all \
&& rm -rf /var/lib/apt/lists/* \
Expand Down
54 changes: 27 additions & 27 deletions fedlearner/trainer/trainer_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,33 @@ def _run_master(role,
cluster_server = ClusterServer(cluster_spec, "master",
server_port=args.server_port)

# use Meituan hadoop
# first:convert Meituan HDFS path to local storage path, if local exit psi result file, user local file
# second:if local not exit psi result file,from Meituan HDFS download to local
if args.using_mt_hadoop:
data_path = args.data_path
if data_path:
local_data_path = get_local_temp_path(data_path)
if not exists(local_data_path):
data_path = mt_hadoop_download(data_path)
else:
data_path = local_data_path
args.data_path = data_path

checkpoint_path = args.checkpoint_path
if checkpoint_path:
args.checkpoint_path = get_local_temp_path(checkpoint_path)

load_checkpoint_path = args.load_checkpoint_path
if load_checkpoint_path:
args.load_checkpoint_path = get_local_temp_path(load_checkpoint_path)
if not exists(args.load_checkpoint_path):
mt_hadoop_download(load_checkpoint_path)

export_path = args.export_path
if export_path:
args.export_path = get_local_temp_path(export_path)

checkpoint_filename_with_path = _get_checkpoint_filename_with_path(args)
data_visitor = _create_data_visitor(args)
master_factory = LeaderTrainerMaster \
Expand Down Expand Up @@ -479,33 +506,6 @@ def train(role,
if not isinstance(role, str) or role.lower() not in (LEADER, FOLLOER):
raise ValueError("--role must set one of %s or %s"%(LEADER, FOLLOER))

# use Meituan hadoop
# first:convert Meituan HDFS path to local storage path, if local exit psi result file, user local file
# second:if local not exit psi result file,from Meituan HDFS download to local
if args.using_mt_hadoop:
data_path = args.data_path
if data_path:
local_data_path = get_local_temp_path(data_path)
if not exists(local_data_path):
data_path = mt_hadoop_download(data_path)
else:
data_path = local_data_path
args.data_path = data_path

checkpoint_path = args.checkpoint_path
if checkpoint_path:
args.checkpoint_path = get_local_temp_path(checkpoint_path)

load_checkpoint_path = args.load_checkpoint_path
if load_checkpoint_path:
args.load_checkpoint_path = get_local_temp_path(load_checkpoint_path)
if not exists(args.load_checkpoint_path):
mt_hadoop_download(load_checkpoint_path)

export_path = args.export_path
if export_path:
args.export_path = get_local_temp_path(export_path)

if args.loglevel:
fl_logging.set_level(args.loglevel)

Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@

def get_version():
base = "1.5"
day = time.strftime('%Y%m%d', time.localtime())
return '%s-dev%s'%(base, day)
return '%s-dev' % base
gejielun marked this conversation as resolved.
Show resolved Hide resolved

setup(
name='fedlearner',
Expand Down
Loading
Loading