From 8ad1fbeaa95badcc9b0be3b52342ae43ab7bd895 Mon Sep 17 00:00:00 2001
From: Ba Tran <18694765+xmba15@users.noreply.github.com>
Date: Sat, 20 Jul 2024 03:18:31 +0900
Subject: [PATCH] Initial commit

---
 .github/workflows/build.yml   | 26 ++++++++++
 .gitignore                    | 90 +++++++++++++++++++++++++++++++++++
 .pre-commit-config.yaml       | 46 ++++++++++++++++++
 README.md                     | 27 +++++++++++
 config/base.yaml              | 55 +++++++++++++++++++++
 data/.keep                    |  0
 docs/.keep                    |  0
 environment.yml               |  9 ++++
 pyproject.toml                | 23 +++++++++
 requirements.txt              | 16 +++++++
 scripts/.keep                 |  0
 scripts/download_dataset.bash |  0
 scripts/test_dataset.py       | 35 ++++++++++++++
 src/__init__.py               |  0
 src/data/__init__.py          |  2 +
 src/data/dataset.py           | 53 +++++++++++++++++++++
 16 files changed, 382 insertions(+)
 create mode 100644 .github/workflows/build.yml
 create mode 100644 .gitignore
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 README.md
 create mode 100644 config/base.yaml
 create mode 100644 data/.keep
 create mode 100644 docs/.keep
 create mode 100644 environment.yml
 create mode 100644 pyproject.toml
 create mode 100644 requirements.txt
 create mode 100644 scripts/.keep
 create mode 100644 scripts/download_dataset.bash
 create mode 100644 scripts/test_dataset.py
 create mode 100644 src/__init__.py
 create mode 100644 src/data/__init__.py
 create mode 100644 src/data/dataset.py

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..0b6f1c5
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,26 @@
+name: Build
+
+on:
+  push:
+    branches: ["master"]
+  pull_request:
+
+jobs:
+  linting:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Apply pre-commit
+        uses: pre-commit/action@v3.0.0
+        with:
+          extra_args: --all-files
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4c783c8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,90 @@
+# Compiled source #
+###################
+*.com
+*.class
+*.dll
+*.exe
+*.o
+*.so
+*.pyc
+.ipynb_checkpoints
+*~
+*#
+build*
+
+# Packages #
+###################
+# it's better to unpack these files and commit the raw source
+# git has its own built in compression methods
+*.7z
+*.dmg
+*.gz
+*.iso
+*.jar
+*.rar
+*.tar
+*.zip
+
+# Logs and databases #
+######################
+*.log
+*.sql
+*.sqlite
+
+# OS generated files #
+######################
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Images
+######################
+*.jpg
+*.gif
+*.png
+*.svg
+*.ico
+
+# Video
+######################
+*.wmv
+*.mpg
+*.mpeg
+*.mp4
+*.mov
+*.flv
+*.avi
+*.ogv
+*.ogg
+*.webm
+
+# Audio
+######################
+*.wav
+*.mp3
+*.wma
+
+# Fonts
+######################
+Fonts
+*.eot
+*.ttf
+*.woff
+
+# Format
+######################
+CPPLINT.cfg
+.clang-format
+
+# Gtags
+######################
+GPATH
+GRTAGS
+GSYMS
+GTAGS
+
+data*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..4d7fcbe
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,46 @@
+repos:
+  - repo: https://github.com/PyCQA/flake8
+    rev: 4.0.1
+    hooks:
+      - id: flake8
+        entry: pflake8
+        additional_dependencies: [pep8-naming, pyproject-flake8]
+
+  - repo: https://github.com/psf/black
+    rev: 22.6.0
+    hooks:
+      - id: black
+        language_version: python3
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.10.1
+    hooks:
+      - id: mypy
+        additional_dependencies: [types-PyYAML, types-setuptools]
+
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.3.0
+    hooks:
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v2.7.1
+    hooks:
+      - id: prettier
+        types_or: [json, markdown, yaml]
+
+  - repo: https://github.com/lovesegfault/beautysh
+    rev: v6.2.1
+    hooks:
+      - id: beautysh
+
+  - repo: https://github.com/pylint-dev/pylint.git
+    rev: v3.1.1
+    hooks:
+      - id: pylint
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..eae705c
--- /dev/null
+++ b/README.md
@@ -0,0 +1,27 @@
+# 📝 image harmonization for copy-and-paste
+
+---
+
+## :tada: TODO
+
+---
+
+- [ ] a
+- [ ] b
+
+## :running: How to Run
+
+---
+
+## 🎛 Development environment
+
+---
+
+```bash
+mamba env create --file environment.yml
+mamba activate image_harmonization
+```
+
+## :gem: References
+
+---
diff --git a/config/base.yaml b/config/base.yaml
new file mode 100644
index 0000000..21b28ea
--- /dev/null
+++ b/config/base.yaml
@@ -0,0 +1,55 @@
+---
+seed: 2024
+
+num_workers: 4
+experiment_name: ""
+
+dataset:
+  train_size: 0.9
+  sub_datasets:
+    - HAdobe5k
+    - HCOCO
+    - Hday2night
+    - HFlickr
+  root_dir: "./data/iharmony"
+  sub_dataset_train_csv_template: "{sub_dataset_name}_train.txt"
+  sub_dataset_test_csv_template: "{sub_dataset_name}_test.txt"
+  sub_dataset_composite_dir: composite_images
+  sub_dataset_mask_dir: masks
+  sub_dataset_real_images_dir: real_images
+
+model:
+  pretrained_model_name:
+  num_classes:
+  num_channels: 4
+  ignore: 255
+  pl_class:
+
+optimizer:
+  type: timm.optim.AdamW
+  lr: 0.0001
+  weight_decay: 0.001
+
+scheduler:
+  type: torch.optim.lr_scheduler.CosineAnnealingLR
+  T_max: 100
+  eta_min: 0.00001
+
+trainer:
+  devices: [0]
+  accelerator: "cuda"
+  max_epochs: 100
+  gradient_clip_val: 5.0
+  accumulate_grad_batches: 4
+  log_every_n_steps: 50
+  resume_from_checkpoint:
+
+train_parameters:
+  batch_size: 5
+
+val_parameters:
+  batch_size: 5
+
+output_root_dir: experiments
+img_h: 640
+img_w: 864
diff --git a/data/.keep b/data/.keep
new file mode 100644
index 0000000..e69de29
diff --git a/docs/.keep b/docs/.keep
new file mode 100644
index 0000000..e69de29
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..4b33274
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,9 @@
+name: image_harmonization
+channels:
+  - defaults
+  - anaconda
+dependencies:
+  - python=3.8
+  - pip
+  - pip:
+      - -r requirements.txt
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..2931e96
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,23 @@
+[tool.flake8]
+max-line-length = 120
+max-complexity = 20
+
+[tool.black]
+line-length = 120
+
+[tool.isort]
+profile = "black"
+multi_line_output = 3
+
+[tool.mypy]
+ignore_missing_imports = true
+
+[tool.pylint."MESSAGES CONTROL"]
+disable = """
+    missing-docstring,
+    import-error,
+    wrong-import-position,
+    exec-used,
+    too-many-locals
+"""
+max-line-length = 120
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..8c7271c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,16 @@
+opencv-python==4.3.0.38
+torch==2.3.1
+torchvision==0.18.1
+torchsummary==1.5.1
+pytorch-lightning==2.3.1
+PyYAML <=6.0.1
+loguru==0.7.2
+tqdm==4.66.4
+tensorboard==2.14.0
+protobuf==4.25.3
+scikit-image==0.21.0
+pandas==2.0.3
+matplotlib==3.7.5
+scikit-learn==1.3.2
+albumentations==1.4.11
+evaluate==0.4.2
diff --git a/scripts/.keep b/scripts/.keep
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/download_dataset.bash b/scripts/download_dataset.bash
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/test_dataset.py b/scripts/test_dataset.py
new file mode 100644
index 0000000..98c3df6
--- /dev/null
+++ b/scripts/test_dataset.py
@@ -0,0 +1,35 @@
+import argparse
+import os
+import sys
+
+import yaml
+
+_CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(_CURRENT_DIR, "../"))
+from src.data import HarmonyDataset, HarmonySubDatasetType
+
+
+def get_args():
+    parser = argparse.ArgumentParser("test read dataset")
+    parser.add_argument("--config_path", type=str, default="./config/base.yaml")
+
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    assert os.path.isfile(args.config_path)
+    with open(args.config_path, encoding="utf-8") as _file:
+        hparams = yaml.load(_file, Loader=yaml.SafeLoader)
+
+    dataset = HarmonyDataset(
+        hparams["dataset"]["root_dir"],
+        sub_dataset_types=[HarmonySubDatasetType[_name] for _name in hparams["dataset"]["sub_datasets"]],
+        train=True,
+        opt_train_size=hparams["dataset"]["train_size"],
+        seed=hparams["seed"],
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/data/__init__.py b/src/data/__init__.py
new file mode 100644
index 0000000..b4d10d2
--- /dev/null
+++ b/src/data/__init__.py
@@ -0,0 +1,2 @@
+# noqa: flake8
+from .dataset import *
diff --git a/src/data/dataset.py b/src/data/dataset.py
new file mode 100644
index 0000000..24c3858
--- /dev/null
+++ b/src/data/dataset.py
@@ -0,0 +1,53 @@
+import os
+from enum import Enum
+from typing import Optional
+
+from torch.utils.data import Dataset
+
+__all__ = (
+    "HarmonySubDatasetType",
+    "HarmonySubDataset",
+    "HarmonyDataset",
+)
+
+
+class HarmonySubDatasetType(Enum):
+    HAdobe5k = 0
+    HCOCO = 1
+    Hday2night = 2
+    HFlickr = 3
+
+
+class HarmonySubDataset(Dataset):
+    def __init__(
+        self,
+        subset_root_dir: str,
+        sub_dataset_type: HarmonySubDatasetType,
+        train: bool = True,
+        opt_train_size: Optional[bool] = None,
+        seed=2024,
+    ):
+        super().__init__()
+        assert os.path.isdir(subset_root_dir)
+        self.sub_dataset_name = sub_dataset_type.name
+        self.subset_root_dir = subset_root_dir
+        self.train = train
+        self.opt_train_size = opt_train_size
+        self.seed = seed
+
+    def _process_gt(self):
+        csv_path = f"{self.sub_dataset_name}_train.csv" if self.train else f"{self.sub_dataset_name}_test.csv"
+
+
+class HarmonyDataset(Dataset):
+    def __init__(
+        self,
+        root_dir: str,
+        sub_dataset_types=list(HarmonySubDatasetType),
+        train: bool = True,
+        opt_train_size: Optional[bool] = None,
+        seed=2024,
+    ):
+        super().__init__()
+        assert os.path.isdir(root_dir)
+        assert len(sub_dataset_types) > 0