diff --git a/CHANGELOG.md b/CHANGELOG.md index 75211f2..a1b2af9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed +- Remove "_dir" or "-dir" prefix from input flag and variables/arguments names + ## [1.0.0] - 2023-08-29 ### Added diff --git a/Dockerfile b/Dockerfile index fae6752..2046331 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,8 +26,8 @@ RUN pip install --no-cache-dir /src/scraper \ RUN mkdir -p /output WORKDIR /output -ENV BUILD_DIR=/tmp -ENV OUTPUT_DIR=/output -ENV ZIMUI_DIST_DIR=/src/zimui +ENV FCC_BUILD=/tmp +ENV FCC_OUTPUT=/output +ENV FCC_ZIMUI_DIST=/src/zimui ENTRYPOINT ["fcc2zim"] diff --git a/scraper/pyproject.toml b/scraper/pyproject.toml index 9cda4db..11fcaa8 100644 --- a/scraper/pyproject.toml +++ b/scraper/pyproject.toml @@ -85,7 +85,7 @@ fix-ruff = "inv fix-ruff --args '{args}'" fixall = "inv fixall --args '{args}'" [tool.hatch.envs.check] -features = ["scripts", "check"] +features = ["scripts", "check", "test"] [tool.hatch.envs.check.scripts] pyright = "inv check-pyright --args '{args}'" diff --git a/scraper/src/fcc2zim/build.py b/scraper/src/fcc2zim/build.py index 549b14e..8ea1b1a 100644 --- a/scraper/src/fcc2zim/build.py +++ b/scraper/src/fcc2zim/build.py @@ -7,7 +7,7 @@ from fcc2zim.constants import Global -def build_curriculum_redirects(curriculum_dist_dir: Path, fcc_lang: str): +def build_curriculum_redirects(curriculum_dist: Path, fcc_lang: str): """ Build the list of redirects from challenge URL to Vite hash URL @@ -15,7 +15,7 @@ def build_curriculum_redirects(curriculum_dist_dir: Path, fcc_lang: str): need an URL for each challenge for the zim search to work. This builds the list of redirect needed fron the challenge URL to Vite hash URL. """ - index_json_path = curriculum_dist_dir.joinpath("curriculum", fcc_lang, "index.json") + index_json_path = curriculum_dist.joinpath("curriculum", fcc_lang, "index.json") with open(index_json_path) as course_index_str: superblock_dict = json.load(course_index_str)[fcc_lang] @@ -24,7 +24,7 @@ def build_curriculum_redirects(curriculum_dist_dir: Path, fcc_lang: str): course_list = superblock_dict[superblock] for course in course_list: meta_json_path = Path( - curriculum_dist_dir, + curriculum_dist, "curriculum", fcc_lang, superblock, @@ -42,31 +42,31 @@ def build_curriculum_redirects(curriculum_dist_dir: Path, fcc_lang: str): def build_command( - zimui_dist_dir: Path, + zimui_dist: Path, fcc_lang: str, creator: Creator, - curriculum_dist_dir: Path, + curriculum_dist: Path, ): Global.logger.info("Scraper: build phase starting") # Add zimui files - for file in zimui_dist_dir.rglob("*"): + for file in zimui_dist.rglob("*"): if file.is_dir(): continue - path = str(Path(file).relative_to(zimui_dist_dir)) + path = str(Path(file).relative_to(zimui_dist)) Global.logger.debug(f"Adding {path} to ZIM") creator.add_item_for(path, fpath=file) # Add prebuild generated curriculum file - for file in curriculum_dist_dir.rglob("*"): + for file in curriculum_dist.rglob("*"): if file.is_dir(): continue - path = str(Path("fcc").joinpath(Path(file).relative_to(curriculum_dist_dir))) + path = str(Path("fcc").joinpath(Path(file).relative_to(curriculum_dist))) Global.logger.debug(f"Adding {path} to ZIM") creator.add_item_for(path, fpath=file) for redir_slug, redir_title in build_curriculum_redirects( - curriculum_dist_dir=curriculum_dist_dir, fcc_lang=fcc_lang + curriculum_dist=curriculum_dist, fcc_lang=fcc_lang ): redirect_path = f"{redir_slug}" redirect_url = redir_slug.count("/") * "../" + f"index.html#{redir_slug}" diff --git a/scraper/src/fcc2zim/entrypoint.py b/scraper/src/fcc2zim/entrypoint.py index 8623a8f..72a3a9c 100644 --- a/scraper/src/fcc2zim/entrypoint.py +++ b/scraper/src/fcc2zim/entrypoint.py @@ -96,24 +96,24 @@ def main(): default=False, ) parser.add_argument( - "--output-dir", + "--output", type=str, help="Output directory where zim file will be built", - default=os.getenv("OUTPUT_DIR", "../output"), + default=os.getenv("FCC_OUTPUT", "../output"), ) parser.add_argument( - "--build-dir", + "--build", type=str, help="The build directory to hold temporary files during scraper operation", - default=os.getenv("BUILD_DIR", "../build"), + default=os.getenv("FCC_BUILD", "../build"), ) parser.add_argument( - "--zimui-dist-dir", + "--zimui-dist", type=str, help=( "Directory containing Vite build output from the Zim UI Vue.JS application" ), - default=os.getenv("ZIMUI_DIST_DIR", "../zimui/dist"), + default=os.getenv("FCC_ZIMUI_DIST", "../zimui/dist"), ) parser.add_argument( "--zim-file", @@ -143,9 +143,9 @@ def main(): do_fetch=os.getenv("DO_FETCH", "False").lower() == "true", do_prebuild=os.getenv("DO_PREBUILD", "False").lower() == "true", do_build=os.getenv("DO_BUILD", "False").lower() == "true", - zimui_dist_dir=args.zimui_dist_dir, - output_dir=args.output_dir, - build_dir=args.build_dir, + zimui_dist=args.zimui_dist, + output=args.output, + build=args.build, language=args.language, name=args.name, title=args.title, diff --git a/scraper/src/fcc2zim/fetch.py b/scraper/src/fcc2zim/fetch.py index b43f700..daa1e33 100644 --- a/scraper/src/fcc2zim/fetch.py +++ b/scraper/src/fcc2zim/fetch.py @@ -7,7 +7,7 @@ from fcc2zim.constants import Global -def fetch_command(zip_path: Path, curriculum_raw_dir: Path, *, force: bool): +def fetch_command(zip_path: Path, curriculum_raw: Path, *, force: bool): Global.logger.info("Scraper: fetch phase starting") url = "https://github.com/freeCodeCamp/freeCodeCamp/archive/refs/heads/main.zip" @@ -19,8 +19,8 @@ def fetch_command(zip_path: Path, curriculum_raw_dir: Path, *, force: bool): else: Global.logger.debug(f"Using existing zip file {zip_path}") - curriculum_raw_dir.mkdir(parents=True, exist_ok=True) - shutil.rmtree(curriculum_raw_dir) + curriculum_raw.mkdir(parents=True, exist_ok=True) + shutil.rmtree(curriculum_raw) Global.logger.debug("Extracting files") with zipfile.ZipFile(zip_path, "r") as zip_ref: @@ -30,7 +30,7 @@ def fetch_command(zip_path: Path, curriculum_raw_dir: Path, *, force: bool): if member.startswith("freeCodeCamp-main/curriculum/") or member.startswith("freeCodeCamp-main/client/i18n/locales") ] - zip_ref.extractall(members=members, path=curriculum_raw_dir) + zip_ref.extractall(members=members, path=curriculum_raw) Global.logger.info(f"Extracted {len(members)} files") - Global.logger.info(f"Fetched curriculum into {curriculum_raw_dir}") + Global.logger.info(f"Fetched curriculum into {curriculum_raw}") Global.logger.info("Scraper: fetch phase finished") diff --git a/scraper/src/fcc2zim/prebuild.py b/scraper/src/fcc2zim/prebuild.py index 67e0a70..9a149b6 100644 --- a/scraper/src/fcc2zim/prebuild.py +++ b/scraper/src/fcc2zim/prebuild.py @@ -32,8 +32,8 @@ def update_index(path: Path, superblock: str, slug: str, language="english"): """ -def write_locales_to_path(source_dir: Path, curriculumdir: Path, language="english"): - shutil.copytree(source_dir, curriculumdir / "locales" / language) +def write_locales_to_path(source: Path, curriculumdir: Path, language="english"): + shutil.copytree(source, curriculumdir / "locales" / language) def write_course_to_path( @@ -75,37 +75,35 @@ def write_course_to_path( def prebuild_command( course_csv: str, fcc_lang: str, - curriculum_raw_dir: Path, - curriculum_dist_dir: Path, + curriculum_raw: Path, + curriculum_dist: Path, ): - """Transform raw data in curriculum_raw_dir into pre-built data in - curriculum_dist_dir + """Transform raw data in curriculum_raw directory into pre-built data in + curriculum_dist directory E.g. if lang in english: - - curriculum_dist_dir/index.json + - /index.json => { 'english': {'superblock': ['basic-javascript'] } } - - curriculum_dist_dir/english///_meta.json + - /english///_meta.json => { challenges: [{slug, title}] } - - curriculum_dist_dir/english///{slug}.md + - /english///{slug}.md """ Global.logger.info("Scraper: prebuild phase starting") - curriculum_dist_dir.mkdir(parents=True, exist_ok=True) - shutil.rmtree(curriculum_dist_dir) + curriculum_dist.mkdir(parents=True, exist_ok=True) + shutil.rmtree(curriculum_dist) - challenges_dir = curriculum_raw_dir.joinpath( + challenges = curriculum_raw.joinpath( "freeCodeCamp-main", "curriculum", "challenges" ) - locales_dir = curriculum_raw_dir.joinpath( + locales = curriculum_raw.joinpath( "freeCodeCamp-main", "client", "i18n", "locales", fcc_lang ) # eg. ['basic-javascript', 'debugging'] for course in course_csv.split(","): Global.logger.debug(f"Prebuilding {course}") - meta = json.loads( - challenges_dir.joinpath("_meta", course, "meta.json").read_text() - ) + meta = json.loads(challenges.joinpath("_meta", course, "meta.json").read_text()) # Get the order that the challenges should be completed in for ids = [ item[0] if isinstance(item, list) else item["id"] @@ -114,7 +112,7 @@ def prebuild_command( superblock = meta["superBlock"] challenge_list: list[Challenge] = [] - for file in get_challenges_for_lang(challenges_dir, fcc_lang): + for file in get_challenges_for_lang(challenges, fcc_lang): challenge = Challenge(file) if challenge.course_superblock != superblock: continue @@ -127,10 +125,10 @@ def prebuild_command( sorted(challenge_list, key=lambda x: ids.index(x.identifier())), superblock, course, - curriculum_dist_dir.joinpath("curriculum", fcc_lang), + curriculum_dist.joinpath("curriculum", fcc_lang), ) # Copy all the locales for this language - write_locales_to_path(locales_dir, curriculum_dist_dir, fcc_lang) - Global.logger.info(f"Prebuilt curriculum into {curriculum_dist_dir}") + write_locales_to_path(locales, curriculum_dist, fcc_lang) + Global.logger.info(f"Prebuilt curriculum into {curriculum_dist}") Global.logger.info("Scraper: prebuild phase finished") diff --git a/scraper/src/fcc2zim/scraper.py b/scraper/src/fcc2zim/scraper.py index 9430cef..98fd42d 100644 --- a/scraper/src/fcc2zim/scraper.py +++ b/scraper/src/fcc2zim/scraper.py @@ -17,9 +17,9 @@ def __init__( do_fetch: bool, do_prebuild: bool, do_build: bool, - zimui_dist_dir: str, - output_dir: str, - build_dir: str, + zimui_dist: str, + output: str, + build: str, language: str, name: str, title: str, @@ -42,18 +42,18 @@ def __init__( if not (self.do_fetch + self.do_prebuild + self.do_build): self.do_fetch = self.do_prebuild = self.do_build = True - self.zimui_dist_dir = Path(zimui_dist_dir) - if not self.zimui_dist_dir.exists(): - raise ValueError(f"zimui_dist_dir {self.zimui_dist_dir} does not exists") + self.zimui_dist = Path(zimui_dist) + if not self.zimui_dist.exists(): + raise ValueError(f"zimui_dist directory {self.zimui_dist} does not exists") - self.output_dir = Path(output_dir) - self.build_dir = Path(build_dir) - self.curriculum_raw_dir = self.build_dir.joinpath("curriculum-raw") - self.curriculum_dist_dir = self.build_dir.joinpath("curriculum-dist") + self.output = Path(output) + self.build = Path(build) + self.curriculum_raw = self.build.joinpath("curriculum-raw") + self.curriculum_dist = self.build.joinpath("curriculum-dist") # Make sure the output directory exists - self.output_dir.mkdir(parents=True, exist_ok=True) - self.build_dir.mkdir(parents=True, exist_ok=True) + self.output.mkdir(parents=True, exist_ok=True) + self.build.mkdir(parents=True, exist_ok=True) self.language = language if self.language not in FCC_LANG_MAP: @@ -74,7 +74,7 @@ def __init__( self.force = force self.course_csv = course_csv if not zip_path: - self.zip_path = self.build_dir.joinpath("main.zip") + self.zip_path = self.build.joinpath("main.zip") else: self.zip_path = Path(zip_path) if not self.zip_path.exists(): @@ -94,7 +94,7 @@ def __init__( self.zim_path = Path(f"{name}_{period}.zim") # build full path - self.zim_path = self.output_dir.joinpath(self.zim_path) + self.zim_path = self.output.joinpath(self.zim_path) if self.zim_path.exists(): if not self.force: @@ -147,20 +147,20 @@ def run_commands(self): if self.do_fetch: fetch_command( force=self.force, - curriculum_raw_dir=self.curriculum_raw_dir, + curriculum_raw=self.curriculum_raw, zip_path=self.zip_path, ) if self.do_prebuild: prebuild_command( fcc_lang=self.fcc_lang, course_csv=self.course_csv, - curriculum_raw_dir=self.curriculum_raw_dir, - curriculum_dist_dir=self.curriculum_dist_dir, + curriculum_raw=self.curriculum_raw, + curriculum_dist=self.curriculum_dist, ) if self.do_build: build_command( fcc_lang=self.fcc_lang, creator=self.creator, - zimui_dist_dir=self.zimui_dist_dir, - curriculum_dist_dir=self.curriculum_dist_dir, + zimui_dist=self.zimui_dist, + curriculum_dist=self.curriculum_dist, ) diff --git a/scraper/tests/test_scraper.py b/scraper/tests/test_scraper.py index 8800541..4d9c287 100644 --- a/scraper/tests/test_scraper.py +++ b/scraper/tests/test_scraper.py @@ -76,9 +76,9 @@ def create_scraper( do_fetch: bool = True, do_prebuild: bool = True, do_build: bool = True, - zimui_dist_dir: str = str(ZIMUI_DIST_PATH), - output_dir: str = str(OUTPUT_PATH), - build_dir: str = str(BUILD_PATH), + zimui_dist: str = str(ZIMUI_DIST_PATH), + output: str = str(OUTPUT_PATH), + build: str = str(BUILD_PATH), language: str = "eng", name="fcc_en_javascript", title="freeCodeCamp Javascript", @@ -96,9 +96,9 @@ def create_scraper( do_fetch=do_fetch, do_prebuild=do_prebuild, do_build=do_build, - zimui_dist_dir=zimui_dist_dir, - output_dir=output_dir, - build_dir=build_dir, + zimui_dist=zimui_dist, + output=output, + build=build, language=language, name=name, title=title, @@ -151,9 +151,9 @@ def test_do_phases_ok( assert scraper.do_prebuild == expected_do_prebuild assert scraper.do_build == expected_do_build - def test_zimui_dist_dir_ko(self): + def test_zimui_dist_ko(self): with pytest.raises(ValueError): - self.create_scraper(zimui_dist_dir="whatever") + self.create_scraper(zimui_dist="whatever") @pytest.mark.parametrize( "language, expected_fcc_lang",