diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 316b720..8cbd1f5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -53,7 +53,7 @@ jobs: -c https://repo.mamba.pm/emscripten-forge \ -c https://repo.mamba.pm/conda-forge \ --yes \ - python pybind11 nlohmann_json pybind11_json numpy "pytest==7.1.1" bzip2 sqlite zlib libffi exceptiongroup + python pybind11 nlohmann_json pybind11_json numpy "pytest==7.1.1" bzip2 sqlite zlib zstd libffi exceptiongroup mkdir build diff --git a/CMakeLists.txt b/CMakeLists.txt index 9051c0d..8490731 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,6 +83,7 @@ set(PYJS_HEADERS include/pyjs/export_py_object.hpp include/pyjs/export_pyjs_module.hpp include/pyjs/untar.hpp + include/pyjs/install_conda_file.hpp include/pyjs/inflate.hpp ${CMAKE_CURRENT_BINARY_DIR}/pyjs_pre.js ${CMAKE_CURRENT_BINARY_DIR}/pyjs_post.js @@ -110,6 +111,7 @@ add_library(pyjs STATIC src/js_timestamp.cpp src/inflate.cpp src/untar.cpp + src/install_conda_file.cpp ${PYCPPSOURCES} ) @@ -153,6 +155,7 @@ SET(PYTHON_UTIL_LIBS ${CMAKE_INSTALL_PREFIX}/lib/libz.a ${CMAKE_INSTALL_PREFIX}/lib/libsqlite3.a ${CMAKE_INSTALL_PREFIX}/lib/libffi.a + ${CMAKE_INSTALL_PREFIX}/lib/libzstd.a ) diff --git a/build_mkdocs.sh b/build_mkdocs.sh index ede9bea..dbbdb5f 100755 --- a/build_mkdocs.sh +++ b/build_mkdocs.sh @@ -28,7 +28,7 @@ if [ ! -d "$WASM_ENV_PREFIX" ]; then -c https://repo.mamba.pm/conda-forge \ --yes \ python pybind11 nlohmann_json pybind11_json numpy \ - bzip2 sqlite zlib libffi exceptiongroup \ + bzip2 sqlite zlib zstd libffi exceptiongroup \ "xeus<4" "xeus-lite<2" xeus-python "xeus-javascript>=0.3.2" xtl "ipython=8.22.2=py311had7285e_1" "traitlets>=5.14.2" else diff --git a/include/pyjs/install_conda_file.hpp b/include/pyjs/install_conda_file.hpp new file mode 100644 index 0000000..e3b065c --- /dev/null +++ b/include/pyjs/install_conda_file.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include + +namespace pyjs +{ + + em::val install_conda_file(const std::string& zstd_file_path, + const std::string& working_dir, + const std::string& path); + +} \ No newline at end of file diff --git a/include/pyjs/pre_js/load_pkg.js b/include/pyjs/pre_js/load_pkg.js index 52091f0..184a1d0 100644 --- a/include/pyjs/pre_js/load_pkg.js +++ b/include/pyjs/pre_js/load_pkg.js @@ -67,6 +67,69 @@ def _py_untar(tarball_path, target_dir): } +Module["_unzip_from_python"] = function(tarball_path, target_dir) { + Module.exec(` +def _py_unzip(tarball_path, target_dir): + import json + from pathlib import Path + import zipfile + + target = Path(target_dir) + target.mkdir(parents=True, exist_ok=True) + pkg_file = {"name": "", "path": ""} + with zipfile.ZipFile(tarball_path, mode="r") as archive: + + for filename in archive.namelist(): + if filename.startswith("pkg-"): + pkg_file["name"] = filename + pkg_file["path"] = str(target / filename) + archive.extract(filename, target_dir) + break + return json.dumps(pkg_file) + +`) + let extracted_file = Module.eval(`_py_unzip("${tarball_path}", "${target_dir}")`) + + return JSON.parse(extracted_file) +} + +Module["_install_conda_file_from_python"] = function(tarball_path, target_dir) { + Module.exec(` +def _py_unbz2(tarball_path, target_dir): + import json + from pathlib import Path + import tarfile + import shutil + import os + import sys + + target = Path(target_dir) + prefix = Path(sys.prefix) + try: + with tarfile.open(tarball_path) as tar: + tar.extractall(target_dir) + + src = target / "site-packages" + dest = prefix / "lib/python3.11/site-packages" + shutil.copytree(src, dest, dirs_exist_ok=True) + for folder in ["etc", "share"]: + src = target / folder + dest = prefix / folder + if src.exists(): + shutil.copytree(src, dest, dirs_exist_ok=True) + shutil.rmtree(target) + except Exception as e: + print("ERROR",e) + raise e + + return json.dumps([]) + +`) + let extracted_file = Module.eval(`_py_unbz2("${tarball_path}", "${target_dir}")`) + + return JSON.parse(extracted_file) +} + @@ -108,20 +171,55 @@ Module["bootstrap_from_empack_packed_environment"] = async function pkg, verbose ) { - const package_url = pkg?.url ?? `${package_tarballs_root_url}/${pkg.filename}`; - if (verbose) { - console.log(`!!fetching pkg ${pkg.name} from ${package_url}`) - } - let byte_array = await fetchByteArray(package_url) - const tarball_path = `/package_tarballs/${pkg.filename}`; - Module.FS.writeFile(tarball_path, byte_array); - if(verbose){ - console.log(`!!extract ${tarball_path} (${byte_array.length} bytes)`) + const package_url = + pkg?.url ?? `${package_tarballs_root_url}/${pkg.filename}`; + if (verbose) { + console.log(`!!fetching pkg ${pkg.name} from ${package_url}`); + } + let byte_array = await fetchByteArray(package_url); + const tarball_path = `/package_tarballs/${pkg.filename}`; + Module.FS.writeFile(tarball_path, byte_array); + if (verbose) { + console.log( + `!!extract ${tarball_path} (${byte_array.length} bytes)` + ); + } + + if (verbose) { + console.log("await python_is_ready_promise"); + } + await python_is_ready_promise; + + if (package_url.toLowerCase().endsWith(".conda")) { + // Conda v2 packages + if (verbose) { + console.log( + `!!extract conda package ${package_url} (${byte_array.length} bytes)` + ); + } + const dest = `/conda_packages/${pkg.name}`; + const pkg_file = Module["_unzip_from_python"]( + tarball_path, + dest + ); + return Module._install_conda_file(pkg_file.path, dest, prefix); + } else if (package_url.toLowerCase().endsWith(".tar.bz2")) { + // Conda v1 packages + if (verbose) { + console.log( + `!!extract conda package ${package_url} (${byte_array.length} bytes)` + ); + } + const dest = `/conda_packages/${pkg.name}`; + return Module["_install_conda_file_from_python"]( + tarball_path, + dest + ); + } else { + // Pre-relocated packages + return Module["_untar_from_python"](tarball_path); + } } - if(verbose){console.log("await python_is_ready_promise");} - await python_is_ready_promise; - return Module["_untar_from_python"](tarball_path); - } async function bootstrap_python(prefix, package_tarballs_root_url, python_package, verbose) { diff --git a/include/pyjs/untar.hpp b/include/pyjs/untar.hpp index a55f3aa..07cde40 100644 --- a/include/pyjs/untar.hpp +++ b/include/pyjs/untar.hpp @@ -1,9 +1,10 @@ #pragma once #include +#include +namespace em = emscripten; namespace pyjs{ - em::val untar(const std::string &tar_path, const std::string &path); - + void untar_impl(FILE *a, const char *path, em::val & shared_libraraies); } \ No newline at end of file diff --git a/src/convert.cpp b/src/convert.cpp index 0597604..079c98d 100644 --- a/src/convert.cpp +++ b/src/convert.cpp @@ -9,6 +9,7 @@ namespace py = pybind11; namespace em = emscripten; + namespace pyjs { std::pair implicit_py_to_js(py::object& py_ret) diff --git a/src/export_js_module.cpp b/src/export_js_module.cpp index bd290dc..d38b184 100644 --- a/src/export_js_module.cpp +++ b/src/export_js_module.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -105,6 +106,7 @@ namespace pyjs em::function("_untar", &untar); + em::function("_install_conda_file", &install_conda_file); em::function("setenv", &set_env); // py-object (proxy) diff --git a/src/install_conda_file.cpp b/src/install_conda_file.cpp new file mode 100644 index 0000000..66cfc64 --- /dev/null +++ b/src/install_conda_file.cpp @@ -0,0 +1,175 @@ + +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +#include + +namespace em = emscripten; +namespace fs = std::filesystem; +namespace pyjs +{ + + bool decompress_zstd(const fs::path& inputFile, const fs::path& outputFile) + { + const int CHUNK_SIZE = 16384; + // Open input and output files + std::ifstream fin(inputFile, std::ios::binary); + std::ofstream fout(outputFile, std::ios::binary); + + if (!fin.is_open() || !fout.is_open()) + { + std::cerr << "Failed to open input or output file!" << std::endl; + return false; + } + + // Create a Zstd decompression context + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + if (!dctx) + { + std::cerr << "Failed to create ZSTD decompression context!" << std::endl; + return false; + } + + // Allocate buffers for input and output + std::vector in_buffer(CHUNK_SIZE); + std::vector out_buffer(CHUNK_SIZE); + + size_t read_bytes = 0; + size_t result = 0; + + // Decompress the file chunk by chunk + while (fin.read(in_buffer.data(), CHUNK_SIZE) || fin.gcount() > 0) + { + read_bytes = fin.gcount(); + const char* src = in_buffer.data(); + + // Stream decompression + while (read_bytes > 0) + { + ZSTD_inBuffer input = { src, read_bytes, 0 }; + ZSTD_outBuffer output = { out_buffer.data(), out_buffer.size(), 0 }; + + result = ZSTD_decompressStream(dctx, &output, &input); + + if (ZSTD_isError(result)) + { + std::cerr << "Decompression error: " << ZSTD_getErrorName(result) << std::endl; + ZSTD_freeDCtx(dctx); + return false; + } + + fout.write(out_buffer.data(), output.pos); + read_bytes -= input.pos; + src += input.pos; + } + } + + // Clean up + ZSTD_freeDCtx(dctx); + return true; + } + + bool merge_directories(fs::path& source_path, fs::path& destination_path) + { + try + { + if (!fs::exists(source_path) || !fs::is_directory(source_path)) + { + return false; + } + + // Create the destination directory if it doesn't exist + if (!fs::exists(destination_path)) + { + fs::create_directories(destination_path); + } + + // Iterate through the source directory recursively + for (const auto& entry : fs::recursive_directory_iterator(source_path)) + { + const fs::path& source_entry_path = entry.path(); + fs::path destination_entry_path + = destination_path / fs::relative(source_entry_path, source_path); + + if (fs::is_directory(source_entry_path)) + { + // Create directories in the destination if they don't exist + if (!fs::exists(destination_entry_path)) + { + fs::create_directory(destination_entry_path); + } + } + else if (fs::is_regular_file(source_entry_path)) + { + // Copy/replace files from source to destination + fs::copy_file(source_entry_path, + destination_entry_path, + fs::copy_options::overwrite_existing); + } + } + + fs::remove_all(source_path); + + return true; + } + catch (const fs::filesystem_error& e) + { + std::cerr << "Filesystem error: " << e.what() << std::endl; + return false; + } + } + + + em::val install_conda_file(const std::string& zstd_file_path, + const std::string& working_dir, + const std::string& prefix) + { + auto output = em::val::array(); + fs::path output_dir(working_dir); + fs::path zstd_path(zstd_file_path); + fs::path output_file = output_dir / "pkg.tar"; + + bool success = decompress_zstd(zstd_path, output_file); + if (!success) + { + return output; + } + FILE* output_file_ptr = fopen(output_file.c_str(), "r"); + + untar_impl(output_file_ptr, output_dir.c_str(), output); + + std::vector dir_names = { "etc", "share" }; + for (size_t i = 0; i < dir_names.size(); i++) + { + auto source_dir_path = output_dir / fs::path(dir_names[i]); + auto dest_dir_path = fs::path(dir_names[i]); + merge_directories(source_dir_path, dest_dir_path); + } + + auto site_packages_dir_path = output_dir / "site-packages"; + if (fs::exists(site_packages_dir_path)) + { + auto site_packages_dest = fs::path(prefix) / "lib/python3.11/site-packages"; + bool check = merge_directories(site_packages_dir_path, site_packages_dest); + if (!check) + { + std::cerr << " Failed to copy package to site-packages directory: " + << site_packages_dir_path << std::endl; + } + } + std::fclose(output_file_ptr); + fs::remove_all(output_dir); + return output; + } +} \ No newline at end of file diff --git a/src/js_timestamp.cpp b/src/js_timestamp.cpp index 263c7e8..0b594b1 100644 --- a/src/js_timestamp.cpp +++ b/src/js_timestamp.cpp @@ -1 +1 @@ -#define PYJS_JS_UTC_TIMESTAMP "2024-08-01 08:54:38.482179" \ No newline at end of file +#define PYJS_JS_UTC_TIMESTAMP "2024-10-18 09:14:27.486802" \ No newline at end of file