huggingface · IlyasMoutawwakil · Sep 16, 2024 · Sep 13, 2024 · Sep 13, 2024
diff --git a/docker/Dockerfile.intel b/docker/Dockerfile.intel
@@ -27,8 +27,6 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
     libpng-dev \
     python3 \
     python3-pip \
-    python3-dev \
-    libnuma-dev \
     && rm -rf /var/lib/apt/lists/*"
 RUN /usr/sbin/update-ccache-symlinks
 RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
@@ -46,7 +44,7 @@ RUN python3 -m pip install --no-cache-dir \
     -f https://download.pytorch.org/whl/torch_stable.html && \
     python3 -m pip install intel-extension-for-pytorch==$IPEX_VERSION && \
     python3 -m pip install oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ && \
-    python3 -m pip install --no-cache-dir  numa
+    python3 -m pip install --no-cache-dir  py-libnuma
 
 ARG KMP_BLOCKTIME=1
 ENV KMP_BLOCKTIME=${KMP_BLOCKTIME}

diff --git a/optimum/intel/utils/import_utils.py b/optimum/intel/utils/import_utils.py
@@ -144,7 +144,7 @@
 
 if _numa_available:
     try:
-        importlib_metadata.version("numa")
+        importlib_metadata.version("py-libnuma")
     except importlib_metadata.PackageNotFoundError:
         _numa_available = False
 

diff --git a/optimum/intel/utils/modeling_utils.py b/optimum/intel/utils/modeling_utils.py
@@ -192,7 +192,7 @@ def bind_cores_for_best_perf():
         raise OSError("bind_cores_for_best_perf: OS not supported, this function can only be run on Linux systems.")
     if not is_numa_available():
         logger.error("'numa' module not found")
-        raise ImportError("'numa' module not found, install with 'pip install numa'")
+        raise ImportError("'numa' module not found, install with 'pip install py-libnuma'")
     import numa
 
     local_size = get_int_from_env(
@@ -201,7 +201,7 @@ def bind_cores_for_best_perf():
     rank_id = get_int_from_env(
         ["LOCAL_RANK", "MPI_LOCALRANKID", "OMPI_COMM_WORLD_LOCAL_RANK", "MV2_COMM_WORLD_LOCAL_RANK"], 0
     )
-    nodes = numa.get_max_node() + 1
+    nodes = numa.info.get_max_node() + 1
     rank_per_node = math.ceil(local_size / nodes)
     num_cpus_per_nodes = int(psutil.cpu_count(logical=False) / nodes)
     node_id = int(rank_id / rank_per_node)
@@ -212,17 +212,17 @@ def bind_cores_for_best_perf():
     else:
         num_cpus_per_rank = int(os.getenv("OMP_NUM_THREADS"))
         logger.info(f"OMP_NUM_THREADS already set to  {num_cpus_per_rank}")
-    if len(numa.get_membind()) == nodes:
+    if len(numa.memory.get_membind_nodes()) == nodes:
         # if numa memory binding is not set, set it to the node where the rank is running
-        numa.set_membind([node_id])
+        numa.memory.set_membind_nodes((node_id))
 
     torch.set_num_threads(num_cpus_per_rank)
 
-    if len(numa.get_affinity(0)) == psutil.cpu_count(logical=True):
+    if len(numa.schedule.get_affinitive_cpus(0)) == psutil.cpu_count(logical=True):
         # if numa affinity is unset (default value is set to all logical cores) set it to the physical cores assigned to the rank
         cpu_start = num_cpus_per_rank * rank_offset_per_node
-        numa.set_affinity(
+        numa.schedule.run_on_cpus(
             0,
-            list(numa.node_to_cpus(node_id))[cpu_start : cpu_start + num_cpus_per_rank],
+            *(numa.info.node_to_cpus(node_id)[cpu_start : cpu_start + num_cpus_per_rank]),
         )
-    logger.info(f"affinity={numa.get_affinity(0)}, membind = {numa.get_membind()}")
+    logger.info(f"affinity={numa.schedule.get_affinitive_cpus(0)}, membind = {numa.memory.get_membind_nodes()}")