Skip to content

Commit

Permalink
Merge pull request #12 from SasCezar/dev
Browse files Browse the repository at this point in the history
Submodule Bump and Minor Fixes
  • Loading branch information
SasCezar authored Jan 30, 2024
2 parents 5d230cd + bb29154 commit 04529f5
Show file tree
Hide file tree
Showing 10 changed files with 75 additions and 28 deletions.
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ authors:
given-names: "Andrea"
orcid: "https://orcid.org/0000-0001-9469-6050"
title: "AutoFL"
version: 0.3.1
version: 0.4.1
doi: "10.5281/zenodo.10255368"
date-released: 2023-09-01
url: "https://github.com/SasCezar/AutoFL"
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ the [config](config) folder.
The main configuration file is [main.yaml](./config/main.yaml), which contains the following options:

- **local**: which environment to use, either local or docker. [Docker](./config/local/docker.yaml) is default.
- **taxonomy**: which taxonomy to use. Currently only [gitranking](./config/taxonomy/gitranking.yaml) is supported.
- **taxonomy**: which taxonomy to use. Currently only [gitranking](./config/taxonomy/gitranking.yaml) is supported, but
custom taxonomies can be added.
- **annotator**: which annotators to use. Default is [simple](./config/annotator/simple.yaml), which allows good results
without extra dependencies on models.
without extra dependencies on language models.
- **version_strategy**: which version strategy to use. Default is [latest](./config/version_strategy/latest.yaml), which
will only analyze the latest version of the project.
- **dataloader**: which dataloader to use. Default is [postgres](./config/dataloader/postgres.yaml) which allows the API
Expand Down Expand Up @@ -96,6 +97,10 @@ Other configuration can be defined by creating a new file in the folder of the s

## Development

The tool is composed of multiple components, their interaction is shown in the following diagram:

![Architecture](resources/architecture/architecture.png)

### Add New Languages

In order to support more languages, a new language specific parser is needed.
Expand Down Expand Up @@ -205,7 +210,7 @@ However, this tool is more up to date, easier to use, more configurable, and als
month = dec,
title = {{AutoFL}},
url = {https://github.com/SasCezar/AutoFL},
version = {0.4.0},
version = {0.4.1},
year = {2023},
url = {https://doi.org/10.5281/zenodo.10255368},
doi = {10.5281/zenodo.10255368}
Expand Down
2 changes: 1 addition & 1 deletion autofl-ui
Submodule autofl-ui updated 1 files
+2 −2 src/Main.py
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "autofl"
version = "0.4.0"
version = "0.4.1"
description = ""
authors = ["Cezar Sas <cezar.sas@gmail.com>"]
readme = "README.md"
Expand Down
Binary file added resources/architecture/architecture.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 8 additions & 5 deletions src/ensemble/avg.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from typing import List, Union
from typing import List, Union, Tuple

import numpy as np

Expand All @@ -11,7 +11,10 @@ class AverageEnsemble(EnsembleBase):
"""
Ensemble method that averages the annotations.
"""
def run(self, annotations: List[Annotation]):
annotations = [x.distribution for x in annotations if not x.unannotated]
mean = np.mean(annotations, axis=0)
return mean
def run(self, annotations: List[Annotation]) -> Tuple[Union[List | np.array], int]:
annotated = np.array([x.distribution for x in annotations if not x.unannotated])
if annotated:
mean = np.mean(annotated, axis=0)
return mean, 0

return annotations[0], 1
9 changes: 5 additions & 4 deletions src/ensemble/cascade.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Union
from typing import List, Union, Tuple

import numpy as np

Expand All @@ -10,8 +10,9 @@ class CascadeEnsemble(EnsembleBase):
"""
Ensemble method that iterates over the annotations and picks the first annotation that is not unannotated.
"""
def run(self, annotations: List[Annotation]):
def run(self, annotations: List[Annotation]) -> Tuple[Union[List | np.array], int]:
for annotation in annotations:
if not annotation.unannotated:
return annotation
return annotations[0]
return annotation, 0

return annotations[0], 1
45 changes: 34 additions & 11 deletions src/ensemble/ensemble.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC
from typing import List, Union
from typing import List, Union, Tuple

import numpy as np

Expand All @@ -13,19 +13,42 @@ class EnsembleBase(ABC):
probabilities for each label. The ensemble method should return a single annotation, which is a list of probabilities
for each label.
"""

def __init__(self):
pass

def __call__(self, annotations: List[Union[np.array, Annotation]], *args, **kwargs):
return self.run(annotations)

def run(self, annotations: List[Annotation]):
def __call__(self, annotations: List[Union[np.array, Annotation]], *args, **kwargs) \
-> Tuple[Union[List | np.array], int]:
"""
Making the ensemble method callable allows to also define functions as ensemble methods instead of classes. This
is useful for ensemble methods that do not have any state.
:param annotations:
:param args:
:param kwargs:
:return:
"""
distributions, unannotated = self.run(annotations)
return self.normalize(distributions), unannotated

def run(self, annotations: List[Annotation]) -> Tuple[Union[List | np.array], int]:
"""
Run the ensemble method. This method should be implemented by subclasses.
The ensemble method is called with a list of annotations, where each annotation is a list of probabilities for
each label. The ensemble method should return a single annotation, which is a list of probabilities for each
label. The ensemble method should also return a boolean indicating whether the ensemble method was able to
produce a valid annotation. If the ensemble method was not able to produce a valid annotation, the ensemble
method should return the first annotation in the list of annotations.
:param annotations:
:return:
"""
pass

@staticmethod
def normalize(annotations: np.array) -> np.array:
"""
Normalize the annotations. This method is used to bring the ensemble result into probability vectors.
:param annotations:
:return:
"""
return np.array(annotations) / np.linalg.norm(annotations)

class EnsembleNone(EnsembleBase):
"""
Ensemble method that does not do anything. This is useful for single annotator experiments.
"""
def run(self, annotations: List[Annotation]):
return annotations
15 changes: 15 additions & 0 deletions src/ensemble/none.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from typing import List, Tuple, Union

import numpy as np

from ensemble.ensemble import EnsembleBase
from entity.annotation import Annotation


class NoneEnsemble(EnsembleBase):
"""
Ensemble method that does not do anything. This is useful for single annotator experiments.
"""

def run(self, annotations: List[Annotation]) -> Tuple[Union[List | np.array], int]:
return annotations[0], 0
4 changes: 2 additions & 2 deletions src/ensemble/voting.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import List, Tuple, Union

import numpy as np

Expand All @@ -11,7 +11,7 @@ def __init__(self, k=10):
super().__init__()
self.k = k

def run(self, annotations: List[Annotation]):
def run(self, annotations: List[Annotation]) -> Tuple[Union[List | np.array], int]:
best, n = self.extract_best(annotations)

if not best:
Expand Down

0 comments on commit 04529f5

Please sign in to comment.