From 8ad1fbeaa95badcc9b0be3b52342ae43ab7bd895 Mon Sep 17 00:00:00 2001 From: Ba Tran <18694765+xmba15@users.noreply.github.com> Date: Sat, 20 Jul 2024 03:18:31 +0900 Subject: [PATCH] Initial commit --- .github/workflows/build.yml | 26 ++++++++++ .gitignore | 90 +++++++++++++++++++++++++++++++++++ .pre-commit-config.yaml | 46 ++++++++++++++++++ README.md | 27 +++++++++++ config/base.yaml | 55 +++++++++++++++++++++ data/.keep | 0 docs/.keep | 0 environment.yml | 9 ++++ pyproject.toml | 23 +++++++++ requirements.txt | 16 +++++++ scripts/.keep | 0 scripts/download_dataset.bash | 0 scripts/test_dataset.py | 35 ++++++++++++++ src/__init__.py | 0 src/data/__init__.py | 2 + src/data/dataset.py | 53 +++++++++++++++++++++ 16 files changed, 382 insertions(+) create mode 100644 .github/workflows/build.yml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 README.md create mode 100644 config/base.yaml create mode 100644 data/.keep create mode 100644 docs/.keep create mode 100644 environment.yml create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 scripts/.keep create mode 100644 scripts/download_dataset.bash create mode 100644 scripts/test_dataset.py create mode 100644 src/__init__.py create mode 100644 src/data/__init__.py create mode 100644 src/data/dataset.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..0b6f1c5 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,26 @@ +name: Build + +on: + push: + branches: ["master"] + pull_request: + +jobs: + linting: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v3 + + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Apply pre-commit + uses: pre-commit/action@v3.0.0 + with: + extra_args: --all-files diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4c783c8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,90 @@ +# Compiled source # +################### +*.com +*.class +*.dll +*.exe +*.o +*.so +*.pyc +.ipynb_checkpoints +*~ +*# +build* + +# Packages # +################### +# it's better to unpack these files and commit the raw source +# git has its own built in compression methods +*.7z +*.dmg +*.gz +*.iso +*.jar +*.rar +*.tar +*.zip + +# Logs and databases # +###################### +*.log +*.sql +*.sqlite + +# OS generated files # +###################### +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Images +###################### +*.jpg +*.gif +*.png +*.svg +*.ico + +# Video +###################### +*.wmv +*.mpg +*.mpeg +*.mp4 +*.mov +*.flv +*.avi +*.ogv +*.ogg +*.webm + +# Audio +###################### +*.wav +*.mp3 +*.wma + +# Fonts +###################### +Fonts +*.eot +*.ttf +*.woff + +# Format +###################### +CPPLINT.cfg +.clang-format + +# Gtags +###################### +GPATH +GRTAGS +GSYMS +GTAGS + +data* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..4d7fcbe --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,46 @@ +repos: + - repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + entry: pflake8 + additional_dependencies: [pep8-naming, pyproject-flake8] + + - repo: https://github.com/psf/black + rev: 22.6.0 + hooks: + - id: black + language_version: python3 + + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.10.1 + hooks: + - id: mypy + additional_dependencies: [types-PyYAML, types-setuptools] + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v2.7.1 + hooks: + - id: prettier + types_or: [json, markdown, yaml] + + - repo: https://github.com/lovesegfault/beautysh + rev: v6.2.1 + hooks: + - id: beautysh + + - repo: https://github.com/pylint-dev/pylint.git + rev: v3.1.1 + hooks: + - id: pylint diff --git a/README.md b/README.md new file mode 100644 index 0000000..eae705c --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# 📝 image harmonization for copy-and-paste + +--- + +## :tada: TODO + +--- + +- [ ] a +- [ ] b + +## :running: How to Run + +--- + +## 🎛 Development environment + +--- + +```bash +mamba env create --file environment.yml +mamba activate image_harmonization +``` + +## :gem: References + +--- diff --git a/config/base.yaml b/config/base.yaml new file mode 100644 index 0000000..21b28ea --- /dev/null +++ b/config/base.yaml @@ -0,0 +1,55 @@ +--- +seed: 2024 + +num_workers: 4 +experiment_name: "" + +dataset: + train_size: 0.9 + sub_datasets: + - HAdobe5k + - HCOCO + - Hday2night + - HFlickr + root_dir: "./data/iharmony" + sub_dataset_train_csv_template: "{sub_dataset_name}_train.txt" + sub_dataset_test_csv_template: "{sub_dataset_name}_test.txt" + sub_dataset_composite_dir: composite_images + sub_dataset_mask_dir: masks + sub_dataset_real_images_dir: real_images + +model: + pretrained_model_name: + num_classes: + num_channels: 4 + ignore: 255 + pl_class: + +optimizer: + type: timm.optim.AdamW + lr: 0.0001 + weight_decay: 0.001 + +scheduler: + type: torch.optim.lr_scheduler.CosineAnnealingLR + T_max: 100 + eta_min: 0.00001 + +trainer: + devices: [0] + accelerator: "cuda" + max_epochs: 100 + gradient_clip_val: 5.0 + accumulate_grad_batches: 4 + log_every_n_steps: 50 + resume_from_checkpoint: + +train_parameters: + batch_size: 5 + +val_parameters: + batch_size: 5 + +output_root_dir: experiments +img_h: 640 +img_w: 864 diff --git a/data/.keep b/data/.keep new file mode 100644 index 0000000..e69de29 diff --git a/docs/.keep b/docs/.keep new file mode 100644 index 0000000..e69de29 diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..4b33274 --- /dev/null +++ b/environment.yml @@ -0,0 +1,9 @@ +name: image_harmonization +channels: + - defaults + - anaconda +dependencies: + - python=3.8 + - pip + - pip: + - -r requirements.txt diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2931e96 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[tool.flake8] +max-line-length = 120 +max-complexity = 20 + +[tool.black] +line-length = 120 + +[tool.isort] +profile = "black" +multi_line_output = 3 + +[tool.mypy] +ignore_missing_imports = true + +[tool.pylint."MESSAGES CONTROL"] +disable = """ + missing-docstring, + import-error, + wrong-import-position, + exec-used, + too-many-locals +""" +max-line-length = 120 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8c7271c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,16 @@ +opencv-python==4.3.0.38 +torch==2.3.1 +torchvision==0.18.1 +torchsummary==1.5.1 +pytorch-lightning==2.3.1 +PyYAML <=6.0.1 +loguru==0.7.2 +tqdm==4.66.4 +tensorboard==2.14.0 +protobuf==4.25.3 +scikit-image==0.21.0 +pandas==2.0.3 +matplotlib==3.7.5 +scikit-learn==1.3.2 +albumentations==1.4.11 +evaluate==0.4.2 diff --git a/scripts/.keep b/scripts/.keep new file mode 100644 index 0000000..e69de29 diff --git a/scripts/download_dataset.bash b/scripts/download_dataset.bash new file mode 100644 index 0000000..e69de29 diff --git a/scripts/test_dataset.py b/scripts/test_dataset.py new file mode 100644 index 0000000..98c3df6 --- /dev/null +++ b/scripts/test_dataset.py @@ -0,0 +1,35 @@ +import argparse +import os +import sys + +import yaml + +_CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(_CURRENT_DIR, "../")) +from src.data import HarmonyDataset, HarmonySubDatasetType + + +def get_args(): + parser = argparse.ArgumentParser("test read dataset") + parser.add_argument("--config_path", type=str, default="./config/base.yaml") + + return parser.parse_args() + + +def main(): + args = get_args() + assert os.path.isfile(args.config_path) + with open(args.config_path, encoding="utf-8") as _file: + hparams = yaml.load(_file, Loader=yaml.SafeLoader) + + dataset = HarmonyDataset( + hparams["dataset"]["root_dir"], + sub_dataset_types=[HarmonySubDatasetType[_name] for _name in hparams["dataset"]["sub_datasets"]], + train=True, + opt_train_size=hparams["dataset"]["train_size"], + seed=hparams["seed"], + ) + + +if __name__ == "__main__": + main() diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/data/__init__.py b/src/data/__init__.py new file mode 100644 index 0000000..b4d10d2 --- /dev/null +++ b/src/data/__init__.py @@ -0,0 +1,2 @@ +# noqa: flake8 +from .dataset import * diff --git a/src/data/dataset.py b/src/data/dataset.py new file mode 100644 index 0000000..24c3858 --- /dev/null +++ b/src/data/dataset.py @@ -0,0 +1,53 @@ +import os +from enum import Enum +from typing import Optional + +from torch.utils.data import Dataset + +__all__ = ( + "HarmonySubDatasetType", + "HarmonySubDataset", + "HarmonyDataset", +) + + +class HarmonySubDatasetType(Enum): + HAdobe5k = 0 + HCOCO = 1 + Hday2night = 2 + HFlickr = 3 + + +class HarmonySubDataset(Dataset): + def __init__( + self, + subset_root_dir: str, + sub_dataset_type: HarmonySubDatasetType, + train: bool = True, + opt_train_size: Optional[bool] = None, + seed=2024, + ): + super().__init__() + assert os.path.isdir(subset_root_dir) + self.sub_dataset_name = sub_dataset_type.name + self.subset_root_dir = subset_root_dir + self.train = train + self.opt_train_size = opt_train_size + self.seed = seed + + def _process_gt(self): + csv_path = f"{self.sub_dataset_name}_train.csv" if self.train else f"{self.sub_dataset_name}_test.csv" + + +class HarmonyDataset(Dataset): + def __init__( + self, + root_dir: str, + sub_dataset_types=list(HarmonySubDatasetType), + train: bool = True, + opt_train_size: Optional[bool] = None, + seed=2024, + ): + super().__init__() + assert os.path.isdir(root_dir) + assert len(sub_dataset_types) > 0