Skip to content

Commit

Permalink
Merge pull request #18 from openzim/remove_dir_suffix
Browse files Browse the repository at this point in the history
Remove dir suffix
  • Loading branch information
benoit74 authored Aug 31, 2023
2 parents 4e43b26 + 00af2ff commit 209a5ac
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 75 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed
- Remove "_dir" or "-dir" prefix from input flag and variables/arguments names

## [1.0.0] - 2023-08-29

### Added
Expand Down
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ RUN pip install --no-cache-dir /src/scraper \
RUN mkdir -p /output
WORKDIR /output

ENV BUILD_DIR=/tmp
ENV OUTPUT_DIR=/output
ENV ZIMUI_DIST_DIR=/src/zimui
ENV FCC_BUILD=/tmp
ENV FCC_OUTPUT=/output
ENV FCC_ZIMUI_DIST=/src/zimui

ENTRYPOINT ["fcc2zim"]
2 changes: 1 addition & 1 deletion scraper/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ fix-ruff = "inv fix-ruff --args '{args}'"
fixall = "inv fixall --args '{args}'"

[tool.hatch.envs.check]
features = ["scripts", "check"]
features = ["scripts", "check", "test"]

[tool.hatch.envs.check.scripts]
pyright = "inv check-pyright --args '{args}'"
Expand Down
20 changes: 10 additions & 10 deletions scraper/src/fcc2zim/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
from fcc2zim.constants import Global


def build_curriculum_redirects(curriculum_dist_dir: Path, fcc_lang: str):
def build_curriculum_redirects(curriculum_dist: Path, fcc_lang: str):
"""
Build the list of redirects from challenge URL to Vite hash URL
The Vite app uses its own router to navigate. We have a single HTML file, but we
need an URL for each challenge for the zim search to work.
This builds the list of redirect needed fron the challenge URL to Vite hash URL.
"""
index_json_path = curriculum_dist_dir.joinpath("curriculum", fcc_lang, "index.json")
index_json_path = curriculum_dist.joinpath("curriculum", fcc_lang, "index.json")
with open(index_json_path) as course_index_str:
superblock_dict = json.load(course_index_str)[fcc_lang]

Expand All @@ -24,7 +24,7 @@ def build_curriculum_redirects(curriculum_dist_dir: Path, fcc_lang: str):
course_list = superblock_dict[superblock]
for course in course_list:
meta_json_path = Path(
curriculum_dist_dir,
curriculum_dist,
"curriculum",
fcc_lang,
superblock,
Expand All @@ -42,31 +42,31 @@ def build_curriculum_redirects(curriculum_dist_dir: Path, fcc_lang: str):


def build_command(
zimui_dist_dir: Path,
zimui_dist: Path,
fcc_lang: str,
creator: Creator,
curriculum_dist_dir: Path,
curriculum_dist: Path,
):
Global.logger.info("Scraper: build phase starting")

# Add zimui files
for file in zimui_dist_dir.rglob("*"):
for file in zimui_dist.rglob("*"):
if file.is_dir():
continue
path = str(Path(file).relative_to(zimui_dist_dir))
path = str(Path(file).relative_to(zimui_dist))
Global.logger.debug(f"Adding {path} to ZIM")
creator.add_item_for(path, fpath=file)

# Add prebuild generated curriculum file
for file in curriculum_dist_dir.rglob("*"):
for file in curriculum_dist.rglob("*"):
if file.is_dir():
continue
path = str(Path("fcc").joinpath(Path(file).relative_to(curriculum_dist_dir)))
path = str(Path("fcc").joinpath(Path(file).relative_to(curriculum_dist)))
Global.logger.debug(f"Adding {path} to ZIM")
creator.add_item_for(path, fpath=file)

for redir_slug, redir_title in build_curriculum_redirects(
curriculum_dist_dir=curriculum_dist_dir, fcc_lang=fcc_lang
curriculum_dist=curriculum_dist, fcc_lang=fcc_lang
):
redirect_path = f"{redir_slug}"
redirect_url = redir_slug.count("/") * "../" + f"index.html#{redir_slug}"
Expand Down
18 changes: 9 additions & 9 deletions scraper/src/fcc2zim/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,24 +96,24 @@ def main():
default=False,
)
parser.add_argument(
"--output-dir",
"--output",
type=str,
help="Output directory where zim file will be built",
default=os.getenv("OUTPUT_DIR", "../output"),
default=os.getenv("FCC_OUTPUT", "../output"),
)
parser.add_argument(
"--build-dir",
"--build",
type=str,
help="The build directory to hold temporary files during scraper operation",
default=os.getenv("BUILD_DIR", "../build"),
default=os.getenv("FCC_BUILD", "../build"),
)
parser.add_argument(
"--zimui-dist-dir",
"--zimui-dist",
type=str,
help=(
"Directory containing Vite build output from the Zim UI Vue.JS application"
),
default=os.getenv("ZIMUI_DIST_DIR", "../zimui/dist"),
default=os.getenv("FCC_ZIMUI_DIST", "../zimui/dist"),
)
parser.add_argument(
"--zim-file",
Expand Down Expand Up @@ -143,9 +143,9 @@ def main():
do_fetch=os.getenv("DO_FETCH", "False").lower() == "true",
do_prebuild=os.getenv("DO_PREBUILD", "False").lower() == "true",
do_build=os.getenv("DO_BUILD", "False").lower() == "true",
zimui_dist_dir=args.zimui_dist_dir,
output_dir=args.output_dir,
build_dir=args.build_dir,
zimui_dist=args.zimui_dist,
output=args.output,
build=args.build,
language=args.language,
name=args.name,
title=args.title,
Expand Down
10 changes: 5 additions & 5 deletions scraper/src/fcc2zim/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from fcc2zim.constants import Global


def fetch_command(zip_path: Path, curriculum_raw_dir: Path, *, force: bool):
def fetch_command(zip_path: Path, curriculum_raw: Path, *, force: bool):
Global.logger.info("Scraper: fetch phase starting")
url = "https://github.com/freeCodeCamp/freeCodeCamp/archive/refs/heads/main.zip"

Expand All @@ -19,8 +19,8 @@ def fetch_command(zip_path: Path, curriculum_raw_dir: Path, *, force: bool):
else:
Global.logger.debug(f"Using existing zip file {zip_path}")

curriculum_raw_dir.mkdir(parents=True, exist_ok=True)
shutil.rmtree(curriculum_raw_dir)
curriculum_raw.mkdir(parents=True, exist_ok=True)
shutil.rmtree(curriculum_raw)

Global.logger.debug("Extracting files")
with zipfile.ZipFile(zip_path, "r") as zip_ref:
Expand All @@ -30,7 +30,7 @@ def fetch_command(zip_path: Path, curriculum_raw_dir: Path, *, force: bool):
if member.startswith("freeCodeCamp-main/curriculum/")
or member.startswith("freeCodeCamp-main/client/i18n/locales")
]
zip_ref.extractall(members=members, path=curriculum_raw_dir)
zip_ref.extractall(members=members, path=curriculum_raw)
Global.logger.info(f"Extracted {len(members)} files")
Global.logger.info(f"Fetched curriculum into {curriculum_raw_dir}")
Global.logger.info(f"Fetched curriculum into {curriculum_raw}")
Global.logger.info("Scraper: fetch phase finished")
38 changes: 18 additions & 20 deletions scraper/src/fcc2zim/prebuild.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def update_index(path: Path, superblock: str, slug: str, language="english"):
"""


def write_locales_to_path(source_dir: Path, curriculumdir: Path, language="english"):
shutil.copytree(source_dir, curriculumdir / "locales" / language)
def write_locales_to_path(source: Path, curriculumdir: Path, language="english"):
shutil.copytree(source, curriculumdir / "locales" / language)


def write_course_to_path(
Expand Down Expand Up @@ -75,37 +75,35 @@ def write_course_to_path(
def prebuild_command(
course_csv: str,
fcc_lang: str,
curriculum_raw_dir: Path,
curriculum_dist_dir: Path,
curriculum_raw: Path,
curriculum_dist: Path,
):
"""Transform raw data in curriculum_raw_dir into pre-built data in
curriculum_dist_dir
"""Transform raw data in curriculum_raw directory into pre-built data in
curriculum_dist directory
E.g. if lang in english:
- curriculum_dist_dir/index.json
- <curriculum_dist>/index.json
=> { 'english': {'superblock': ['basic-javascript'] } }
- curriculum_dist_dir/english/<superblock>/<course_slug>/_meta.json
- <curriculum_dist>/english/<superblock>/<course_slug>/_meta.json
=> { challenges: [{slug, title}] }
- curriculum_dist_dir/english/<superblock>/<course_slug>/{slug}.md
- <curriculum_dist>/english/<superblock>/<course_slug>/{slug}.md
"""
Global.logger.info("Scraper: prebuild phase starting")

curriculum_dist_dir.mkdir(parents=True, exist_ok=True)
shutil.rmtree(curriculum_dist_dir)
curriculum_dist.mkdir(parents=True, exist_ok=True)
shutil.rmtree(curriculum_dist)

challenges_dir = curriculum_raw_dir.joinpath(
challenges = curriculum_raw.joinpath(
"freeCodeCamp-main", "curriculum", "challenges"
)
locales_dir = curriculum_raw_dir.joinpath(
locales = curriculum_raw.joinpath(
"freeCodeCamp-main", "client", "i18n", "locales", fcc_lang
)

# eg. ['basic-javascript', 'debugging']
for course in course_csv.split(","):
Global.logger.debug(f"Prebuilding {course}")
meta = json.loads(
challenges_dir.joinpath("_meta", course, "meta.json").read_text()
)
meta = json.loads(challenges.joinpath("_meta", course, "meta.json").read_text())
# Get the order that the challenges should be completed in for <course>
ids = [
item[0] if isinstance(item, list) else item["id"]
Expand All @@ -114,7 +112,7 @@ def prebuild_command(
superblock = meta["superBlock"]

challenge_list: list[Challenge] = []
for file in get_challenges_for_lang(challenges_dir, fcc_lang):
for file in get_challenges_for_lang(challenges, fcc_lang):
challenge = Challenge(file)
if challenge.course_superblock != superblock:
continue
Expand All @@ -127,10 +125,10 @@ def prebuild_command(
sorted(challenge_list, key=lambda x: ids.index(x.identifier())),
superblock,
course,
curriculum_dist_dir.joinpath("curriculum", fcc_lang),
curriculum_dist.joinpath("curriculum", fcc_lang),
)

# Copy all the locales for this language
write_locales_to_path(locales_dir, curriculum_dist_dir, fcc_lang)
Global.logger.info(f"Prebuilt curriculum into {curriculum_dist_dir}")
write_locales_to_path(locales, curriculum_dist, fcc_lang)
Global.logger.info(f"Prebuilt curriculum into {curriculum_dist}")
Global.logger.info("Scraper: prebuild phase finished")
38 changes: 19 additions & 19 deletions scraper/src/fcc2zim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ def __init__(
do_fetch: bool,
do_prebuild: bool,
do_build: bool,
zimui_dist_dir: str,
output_dir: str,
build_dir: str,
zimui_dist: str,
output: str,
build: str,
language: str,
name: str,
title: str,
Expand All @@ -42,18 +42,18 @@ def __init__(
if not (self.do_fetch + self.do_prebuild + self.do_build):
self.do_fetch = self.do_prebuild = self.do_build = True

self.zimui_dist_dir = Path(zimui_dist_dir)
if not self.zimui_dist_dir.exists():
raise ValueError(f"zimui_dist_dir {self.zimui_dist_dir} does not exists")
self.zimui_dist = Path(zimui_dist)
if not self.zimui_dist.exists():
raise ValueError(f"zimui_dist directory {self.zimui_dist} does not exists")

self.output_dir = Path(output_dir)
self.build_dir = Path(build_dir)
self.curriculum_raw_dir = self.build_dir.joinpath("curriculum-raw")
self.curriculum_dist_dir = self.build_dir.joinpath("curriculum-dist")
self.output = Path(output)
self.build = Path(build)
self.curriculum_raw = self.build.joinpath("curriculum-raw")
self.curriculum_dist = self.build.joinpath("curriculum-dist")

# Make sure the output directory exists
self.output_dir.mkdir(parents=True, exist_ok=True)
self.build_dir.mkdir(parents=True, exist_ok=True)
self.output.mkdir(parents=True, exist_ok=True)
self.build.mkdir(parents=True, exist_ok=True)

self.language = language
if self.language not in FCC_LANG_MAP:
Expand All @@ -74,7 +74,7 @@ def __init__(
self.force = force
self.course_csv = course_csv
if not zip_path:
self.zip_path = self.build_dir.joinpath("main.zip")
self.zip_path = self.build.joinpath("main.zip")
else:
self.zip_path = Path(zip_path)
if not self.zip_path.exists():
Expand All @@ -94,7 +94,7 @@ def __init__(
self.zim_path = Path(f"{name}_{period}.zim")

# build full path
self.zim_path = self.output_dir.joinpath(self.zim_path)
self.zim_path = self.output.joinpath(self.zim_path)

if self.zim_path.exists():
if not self.force:
Expand Down Expand Up @@ -147,20 +147,20 @@ def run_commands(self):
if self.do_fetch:
fetch_command(
force=self.force,
curriculum_raw_dir=self.curriculum_raw_dir,
curriculum_raw=self.curriculum_raw,
zip_path=self.zip_path,
)
if self.do_prebuild:
prebuild_command(
fcc_lang=self.fcc_lang,
course_csv=self.course_csv,
curriculum_raw_dir=self.curriculum_raw_dir,
curriculum_dist_dir=self.curriculum_dist_dir,
curriculum_raw=self.curriculum_raw,
curriculum_dist=self.curriculum_dist,
)
if self.do_build:
build_command(
fcc_lang=self.fcc_lang,
creator=self.creator,
zimui_dist_dir=self.zimui_dist_dir,
curriculum_dist_dir=self.curriculum_dist_dir,
zimui_dist=self.zimui_dist,
curriculum_dist=self.curriculum_dist,
)
16 changes: 8 additions & 8 deletions scraper/tests/test_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ def create_scraper(
do_fetch: bool = True,
do_prebuild: bool = True,
do_build: bool = True,
zimui_dist_dir: str = str(ZIMUI_DIST_PATH),
output_dir: str = str(OUTPUT_PATH),
build_dir: str = str(BUILD_PATH),
zimui_dist: str = str(ZIMUI_DIST_PATH),
output: str = str(OUTPUT_PATH),
build: str = str(BUILD_PATH),
language: str = "eng",
name="fcc_en_javascript",
title="freeCodeCamp Javascript",
Expand All @@ -96,9 +96,9 @@ def create_scraper(
do_fetch=do_fetch,
do_prebuild=do_prebuild,
do_build=do_build,
zimui_dist_dir=zimui_dist_dir,
output_dir=output_dir,
build_dir=build_dir,
zimui_dist=zimui_dist,
output=output,
build=build,
language=language,
name=name,
title=title,
Expand Down Expand Up @@ -151,9 +151,9 @@ def test_do_phases_ok(
assert scraper.do_prebuild == expected_do_prebuild
assert scraper.do_build == expected_do_build

def test_zimui_dist_dir_ko(self):
def test_zimui_dist_ko(self):
with pytest.raises(ValueError):
self.create_scraper(zimui_dist_dir="whatever")
self.create_scraper(zimui_dist="whatever")

@pytest.mark.parametrize(
"language, expected_fcc_lang",
Expand Down

0 comments on commit 209a5ac

Please sign in to comment.