Skip to content

Commit

Permalink
better logging + working directory + auto detect tqdm
Browse files Browse the repository at this point in the history
  • Loading branch information
FabianGroeger96 committed Mar 21, 2024
1 parent e4daa88 commit 18d80a0
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 36 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ clean: ##@Utils clean the project
@rm -f -R tmp/
@rm -f -R cov_html/

_build_publish:
@python3 -m pip install --upgrade pip
@python3 -m pip install setuptools wheel twine
@python3 setup sdist bdist_wheel
@python3 -m twine upload --verbose dist/*

###########################
# DOCKER
###########################
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ seaborn
SciencePlots
scikit-image
codecov
jupyter
loguru
7 changes: 2 additions & 5 deletions src/cleaner/auto_cleaning_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import scipy
import scipy.stats
from loguru import logger

from ..utils.plotting import (
plot_frac_cut,
Expand Down Expand Up @@ -86,7 +87,6 @@ def fraction_cut(
plot_result: bool = False,
ax=None,
bins="sqrt",
debug: bool = False,
path: Optional[str] = None,
):
M = len(scores)
Expand All @@ -113,8 +113,7 @@ def fraction_cut(
# Exclude the scores below probability threshold
exclude = logit_scores < cutoff
n = exclude.sum()
if debug:
print(f"{n} outliers ({n/self.N:.1%})")
logger.debug(f"{n} outliers ({n/self.N:.1%})")

if plot_result:
if ax is not None:
Expand Down Expand Up @@ -155,7 +154,6 @@ def threshold_sensitivity(self, scores: np.ndarray, ax=None):
alpha=0.1,
q=q,
plot_result=False,
debug=False,
).shape[0],
)
for q in thresholds
Expand Down Expand Up @@ -187,7 +185,6 @@ def alpha_sensitivity(self, scores: np.ndarray, ax=None):
scores=scores,
alpha=a,
plot_result=False,
debug=False,
).shape[0],
)
for a in alphas
Expand Down
2 changes: 1 addition & 1 deletion src/cleaner/near_duplicates/embedding_distance_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import List, Tuple

import numpy as np
from tqdm import tqdm
from tqdm.auto import tqdm

from ...cleaner.near_duplicates.base_near_duplicate_mixin import BaseNearDuplicateMixin
from ...ssl_library.src.utils.logging import plot_dist
Expand Down
24 changes: 14 additions & 10 deletions src/cleaner/selfclean.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from ..ssl_library.src.augmentations.ibot import iBOTDataAugmentation
from ..ssl_library.src.pkg import Embedder, embed_dataset
from ..ssl_library.src.trainers.dino_trainer import DINOTrainer
from ..ssl_library.src.utils.logging import set_log_level
from ..ssl_library.src.utils.utils import cleanup, init_distributed_mode
from ..utils.utils import set_dataset_transformation

Expand All @@ -26,7 +27,8 @@
"warmup_epochs": 10,
"momentum_teacher": 0.996,
"clip_grad": 3.0,
"apply_l2_norm": True,
"apply_l2_norm": False, # TODO: check influence of this
"save_every_n_epochs": 10,
"model": {
"out_dim": 4096,
"emb_dim": 192,
Expand All @@ -35,11 +37,11 @@
"use_bn_in_head": False,
"norm_last_layer": True,
"student": {
"drop_path_rate": 0.1,
"drop_path_rate": 0.1, # TODO: check influence of this
"pretrained": True,
},
"teacher": {
"drop_path_rate": 0.1,
"drop_path_rate": 0.1, # TODO: check influence of this
"pretrained": True,
},
"eval": {"n_last_blocks": 4, "avgpool_patchtokens": False},
Expand Down Expand Up @@ -84,13 +86,14 @@ def __init__(
plot_top_N: Optional[int] = None,
output_path: Optional[str] = None,
figsize: tuple = (10, 8),
# logging
log_level: str = "INFO",
**kwargs,
):
set_log_level(min_log_level=log_level)
self.memmap = memmap
self.memmap_path = memmap_path

self.model = None

self.cleaner = SelfCleanCleaner(
distance_function_path=distance_function_path,
distance_function_name=distance_function_name,
Expand All @@ -102,9 +105,9 @@ def __init__(
plot_top_N=plot_top_N,
output_path=output_path,
figsize=figsize,
log_level=log_level,
**kwargs,
)

self.base_transform = transforms.Compose(
[
transforms.Resize(256, interpolation=InterpolationMode.BICUBIC),
Expand All @@ -118,7 +121,7 @@ def run_on_image_folder(
self,
input_path: Union[str, Path],
epochs: int = 100,
batch_size: int = 32,
batch_size: int = 64,
ssl_pre_training: bool = True,
work_dir: Optional[str] = None,
num_workers: int = 24,
Expand Down Expand Up @@ -158,7 +161,7 @@ def run_on_dataset(
self,
dataset,
epochs: int = 100,
batch_size: int = 32,
batch_size: int = 64,
ssl_pre_training: bool = True,
work_dir: Optional[str] = None,
num_workers: int = 24,
Expand Down Expand Up @@ -194,7 +197,7 @@ def _run(
self,
dataset,
epochs: int = 100,
batch_size: int = 32,
batch_size: int = 64,
ssl_pre_training: bool = True,
work_dir: Optional[str] = None,
num_workers: int = 24,
Expand Down Expand Up @@ -244,6 +247,7 @@ def _run(
normalize=apply_l2_norm,
memmap=self.memmap,
memmap_path=self.memmap_path,
tqdm_desc="Creating dataset representation",
)
# for default datasets we can set the paths manually
if hasattr(dataset, "_image_files") and paths is None:
Expand All @@ -262,7 +266,7 @@ def train_dino(
self,
dataset: Dataset,
epochs: int = 100,
batch_size: int = 32,
batch_size: int = 64,
ssl_pre_training: bool = True,
work_dir: Optional[str] = None,
hyperparameters: dict = DINO_STANDARD_HYPERPARAMETERS,
Expand Down
7 changes: 6 additions & 1 deletion src/cleaner/selfclean_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import scienceplots # noqa: F401
import sklearn # noqa: F401
from tqdm import tqdm
from tqdm.auto import tqdm

from ..cleaner.auto_cleaning_mixin import AutoCleaningMixin
from ..cleaner.base_cleaner import BaseCleaner
Expand All @@ -17,6 +17,7 @@
from ..cleaner.near_duplicates.embedding_distance_mixin import EmbeddingDistanceMixin
from ..distances import * # noqa: F401, F403
from ..distances.projective_distance import * # noqa: F401, F403
from ..ssl_library.src.utils.logging import set_log_level
from ..utils.plotting import plot_inspection_result


Expand All @@ -42,8 +43,12 @@ def __init__(
plot_top_N: Optional[int] = None,
output_path: Optional[str] = None,
figsize: tuple = (10, 8),
# logging
log_level: str = "INFO",
**kwargs,
):
set_log_level(min_log_level=log_level)

self.memmap = memmap
self.chunk_size = chunk_size
self.precision_type_distance = precision_type_distance
Expand Down
30 changes: 14 additions & 16 deletions src/scoring/lad_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
from typing import List, Optional, Tuple

import matplotlib.pyplot as plt
from loguru import logger
from matplotlib.patches import Rectangle
from scipy.cluster import hierarchy
from tqdm import tqdm
from tqdm.auto import tqdm


class LAD:
Expand All @@ -29,7 +30,6 @@ def calc_scores(
linkage_matrix: list,
global_leaves: bool = False,
save_fig_path: Optional[str] = None,
debug: bool = False,
) -> List[Tuple[float, int]]:
if self.plot_scores:
plt.figure(figsize=(5, 5))
Expand Down Expand Up @@ -140,14 +140,13 @@ def calc_scores(
linestyle="dotted",
)

if debug:
print(
f"ID: {node.id}, #leaves: {n_leaves}, scores: {len(scores)}, "
f"dist: {round(node.dist, 2)}, square: {round(square, 2)}, "
f"start: {round(start, 2)}, end: {round(end, 2)}, "
f"p_left: {round(p_left, 2)}, p_right: {round(p_right, 2)}, "
f"w_left: {round(w_left, 2)}, w_right: {round(w_right, 2)}"
)
logger.debug(
f"ID: {node.id}, #leaves: {n_leaves}, scores: {len(scores)}, "
f"dist: {round(node.dist, 2)}, square: {round(square, 2)}, "
f"start: {round(start, 2)}, end: {round(end, 2)}, "
f"p_left: {round(p_left, 2)}, p_right: {round(p_right, 2)}, "
f"w_left: {round(w_left, 2)}, w_right: {round(w_right, 2)}"
)

node_right = NodeElement(
node=node.right,
Expand All @@ -167,12 +166,11 @@ def calc_scores(
)
queue.insert(0, node_left)
else:
if debug:
print(
f"Leaf ({node.id}), "
f"score: {round(sum(scores), 2)}, square: {round(square, 2)}, "
f"start: {round(start, 2)}, end: {round(end, 2)}"
)
logger.debug(
f"Leaf ({node.id}), "
f"score: {round(sum(scores), 2)}, square: {round(square, 2)}, "
f"start: {round(start, 2)}, end: {round(end, 2)}"
)

if self.plot_scores:
plt.gca().text(
Expand Down
2 changes: 1 addition & 1 deletion src/ssl_library
3 changes: 2 additions & 1 deletion src/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import numpy as np
import torch
from loguru import logger
from torch.utils.data import ConcatDataset, Dataset


Expand Down Expand Up @@ -35,7 +36,7 @@ def actual_indices(idx, n):
shifts = np.concatenate([[0], n_row_elems])
jj = np.arange(1, n)[ii] + idx - shifts[ii]
if np.sum(ii < 0) > 0 or np.sum(jj < 0) > 0:
print("Negative indices")
logger.error("Negative indices")
return ii, jj


Expand Down
2 changes: 1 addition & 1 deletion tests/unittests/cleaner/test_auto_cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_predict_auto_cleaning_with_plotting(self):
cleaner = SelfCleanCleaner(
memmap=False,
auto_cleaning=True,
cleaner_kwargs={"debug": True, "plot_result": True},
cleaner_kwargs={"plot_result": True},
)
cleaner.fit(emb_space=self.emb_space, labels=self.labels)
out_dict = cleaner.predict()
Expand Down

0 comments on commit 18d80a0

Please sign in to comment.