Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding pylint #349

Merged
merged 3 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ jobs:
uv python pin ${{ matrix.python-version }}
- run: uv sync --python-preference=only-managed
- run: uv run refurb paperqa tests
- run: uv run pylint paperqa
test:
runs-on: ubuntu-latest
strategy:
Expand Down
8 changes: 4 additions & 4 deletions paperqa/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,16 @@ def configure_cli_logging(verbosity: int = 0) -> None:

rich_handler.setFormatter(logging.Formatter("%(message)s", datefmt="[%X]"))

module_logger = logging.getLogger(__name__.split(".")[0])
module_logger = logging.getLogger(__name__.split(".", maxsplit=1)[0])
jamesbraza marked this conversation as resolved.
Show resolved Hide resolved

if not any(isinstance(h, RichHandler) for h in module_logger.handlers):
module_logger.addHandler(rich_handler)

for logger_name, logger in logging.Logger.manager.loggerDict.items():
if isinstance(logger, logging.Logger) and (
for logger_name, logger_ in logging.Logger.manager.loggerDict.items():
if isinstance(logger_, logging.Logger) and (
log_level := verbosity_map.get(min(verbosity, 2), {}).get(logger_name)
):
logger.setLevel(log_level)
logger_.setLevel(log_level)

if verbosity > 0:
print(f"PaperQA version: {__version__}")
Expand Down
4 changes: 2 additions & 2 deletions paperqa/agents/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ async def run_aviary_agent(
obs, reward, done, truncated = await env.step(action)
if on_env_step_callback:
await on_env_step_callback(obs, reward, done, truncated)
status = AgentStatus.SUCCESS # pylint: disable=redefined-outer-name
status = AgentStatus.SUCCESS
except TimeoutError:
logger.warning(
f"Agent timeout after {query.settings.agent.timeout}-sec, just answering."
Expand Down Expand Up @@ -367,7 +367,7 @@ async def run_ldp_agent(
obs, reward, done, truncated = await env.step(action.value)
if on_env_step_callback:
await on_env_step_callback(obs, reward, done, truncated)
status = AgentStatus.SUCCESS # pylint: disable=redefined-outer-name
status = AgentStatus.SUCCESS
except TimeoutError:
logger.warning(
f"Agent timeout after {query.settings.agent.timeout}-sec, just answering."
Expand Down
24 changes: 15 additions & 9 deletions paperqa/agents/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@

import anyio
from pydantic import BaseModel
from tantivy import Document, Index, Schema, SchemaBuilder, Searcher
from tantivy import ( # pylint: disable=no-name-in-module
Document,
Index,
Schema,
SchemaBuilder,
Searcher,
)
from tenacity import (
RetryError,
retry,
Expand All @@ -41,15 +47,15 @@ class AsyncRetryError(Exception):
class RobustEncoder(json.JSONEncoder):
"""JSON encoder that can handle UUID and set objects."""

def default(self, obj):
if isinstance(obj, UUID):
def default(self, o):
if isinstance(o, UUID):
# if the obj is uuid, we simply return the value of uuid
return str(obj)
if isinstance(obj, set):
return list(obj)
if isinstance(obj, os.PathLike):
return str(obj)
return json.JSONEncoder.default(self, obj)
return str(o)
if isinstance(o, set):
return list(o)
if isinstance(o, os.PathLike):
return str(o)
return json.JSONEncoder.default(self, o)


class SearchDocumentStorage(StrEnum):
Expand Down
13 changes: 4 additions & 9 deletions paperqa/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,14 @@

logger = logging.getLogger(__name__)

DEFAULT_CLIENTS: (
jamesbraza marked this conversation as resolved.
Show resolved Hide resolved
Collection[type[MetadataPostProcessor | MetadataProvider]]
| Sequence[Collection[type[MetadataPostProcessor | MetadataProvider]]]
) = {
DEFAULT_CLIENTS: Collection[type[MetadataPostProcessor | MetadataProvider]] = {
CrossrefProvider,
SemanticScholarProvider,
JournalQualityPostProcessor,
}

ALL_CLIENTS: (
Collection[type[MetadataPostProcessor | MetadataProvider]]
| Sequence[Collection[type[MetadataPostProcessor | MetadataProvider]]]
) = DEFAULT_CLIENTS | { # type: ignore[operator]
ALL_CLIENTS: Collection[type[MetadataPostProcessor | MetadataProvider]] = {
*DEFAULT_CLIENTS,
UnpaywallProvider,
}

Expand Down Expand Up @@ -63,7 +58,7 @@ def __repr__(self) -> str:


class DocMetadataClient:
def __init__(
def __init__( # pylint: disable=dangerous-default-value
self,
session: aiohttp.ClientSession | None = None,
clients: (
Expand Down
13 changes: 5 additions & 8 deletions paperqa/contrib/zotero.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ def __init__(
" from the text 'Your userID for use in API calls is [XXXXXX]'."
" Then, set the environment variable ZOTERO_USER_ID to this value."
)
else: # noqa: RET506
library_id = os.environ["ZOTERO_USER_ID"]
library_id = os.environ["ZOTERO_USER_ID"]

if api_key is None:
self.logger.info("Attempting to get ZOTERO_API_KEY from `os.environ`...")
Expand All @@ -97,8 +96,7 @@ def __init__(
" with access to your library."
" Then, set the environment variable ZOTERO_API_KEY to this value."
)
else: # noqa: RET506
api_key = os.environ["ZOTERO_API_KEY"]
api_key = os.environ["ZOTERO_API_KEY"]

self.logger.info(f"Using library ID: {library_id} with type: {library_type}.")

Expand All @@ -124,7 +122,7 @@ def get_pdf(self, item: dict) -> Path | None:
An item from `pyzotero`. Should have a `key` field, and also have an entry
`links->attachment->attachmentType == application/pdf`.
"""
if type(item) != dict: # noqa: E721
if not isinstance(item, dict):
raise TypeError("Pass the full item of the paper. The item must be a dict.")

pdf_key = _extract_pdf_key(item)
Expand Down Expand Up @@ -310,8 +308,7 @@ def _get_collection_id(self, collection_name: str) -> str:

def _get_citation_key(item: dict) -> str:
if (
"data" not in item
or "creators" not in item["data"]
"creators" not in item.get("data", {})
or len(item["data"]["creators"]) == 0
or "lastName" not in item["data"]["creators"][0]
or "title" not in item["data"]
Expand Down Expand Up @@ -341,7 +338,7 @@ def _extract_pdf_key(item: dict) -> str | None:

attachments = item["links"]["attachment"]

if type(attachments) != dict: # noqa: E721
if not isinstance(attachments, dict):
# Find first attachment with attachmentType == application/pdf:
for attachment in attachments:
# TODO: This assumes there's only one PDF attachment.
Expand Down
20 changes: 11 additions & 9 deletions paperqa/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,16 @@ async def map_fxn_summary(
if not success:
score = extract_score(context)

c = Context(
context=context,
text=Text(
text=text.text,
name=text.name,
doc=text.doc.__class__(**text.doc.model_dump(exclude={"embedding"})),
return (
Context(
context=context,
text=Text(
text=text.text,
name=text.name,
doc=text.doc.__class__(**text.doc.model_dump(exclude={"embedding"})),
),
score=score, # pylint: disable=possibly-used-before-assignment
**extras,
),
score=score,
**extras,
llm_result,
)
return c, llm_result
4 changes: 2 additions & 2 deletions paperqa/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ async def acomplete_iter(self, prompt: str) -> AsyncIterable[Chunk]: # noqa: AR
Only the last tuple will be non-zero.
"""
raise NotImplementedError
if False: # type: ignore[unreachable]
if False: # type: ignore[unreachable] # pylint: disable=using-constant-test
yield # Trick mypy: https://github.com/python/mypy/issues/5070#issuecomment-1050834495

async def achat(self, messages: Iterable[dict[str, str]]) -> Chunk:
Expand All @@ -157,7 +157,7 @@ async def achat_iter(
Only the last tuple will be non-zero.
"""
raise NotImplementedError
if False: # type: ignore[unreachable]
if False: # type: ignore[unreachable] # pylint: disable=using-constant-test
yield # Trick mypy: https://github.com/python/mypy/issues/5070#issuecomment-1050834495

def infer_llm_type(self) -> str:
Expand Down
4 changes: 2 additions & 2 deletions paperqa/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def available_for_inference(self) -> list["ParsingOptions"]:
def _get_parse_type(opt: ParsingOptions, config: "ParsingSettings") -> str:
if opt == ParsingOptions.PAPERQA_DEFAULT:
return config.parser_version_string
assert_never()
assert_never(opt)
Copy link
Collaborator Author

@jamesbraza jamesbraza Sep 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mskarlin fyi it looks like you didn't properly roll out assert_never

More just sharing for future reference



class ChunkingOptions(StrEnum):
Expand Down Expand Up @@ -117,7 +117,7 @@ def chunk_type(self, chunking_selection: ChunkingOptions | None = None) -> str:
f"{self.parser_version_string}|{chunking_selection.value}"
f"|tokens={self.chunk_size}|overlap={self.overlap}"
)
assert_never()
assert_never(chunking_selection)

@property
def parser_version_string(self) -> str:
Expand Down
10 changes: 8 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ preview = true
[tool.codespell]
check-filenames = true
check-hidden = true
ignore-words-list = "aadd,ser"
ignore-words-list = "aadd,astroid,ser"

[tool.mypy]
# Type-checks the interior of functions without type annotations.
Expand Down Expand Up @@ -172,6 +172,7 @@ disable = [
"fixme", # codetags are useful
"function-redefined", # Rely on mypy no-redef for this
"import-outside-toplevel", # Rely on ruff PLC0415 for this
"inconsistent-return-statements", # TODO: remove after pylint>=3.3 for https://github.com/pylint-dev/pylint/pull/9591
"invalid-name", # Don't care to enforce this
"line-too-long", # Rely on ruff E501 for this
"logging-fstring-interpolation", # f-strings are convenient
Expand All @@ -194,9 +195,12 @@ disable = [
"too-many-locals", # Rely on ruff PLR0914 for this
"too-many-return-statements", # Rely on ruff PLR0911 for this
"too-many-statements", # Rely on ruff PLR0915 for this
"undefined-loop-variable", # Don't care to enforce this
"ungrouped-imports", # Rely on ruff I001 for this
"unidiomatic-typecheck", # Rely on ruff E721 for this
"unreachable", # Rely on mypy unreachable for this
"unspecified-encoding", # Don't care to enforce this
"unsubscriptable-object", # Buggy, SEE: https://github.com/PyCQA/pylint/issues/3637
"unsubscriptable-object", # Buggy, SEE: https://github.com/pylint-dev/pylint/issues/3637
"unsupported-membership-test", # Buggy, SEE: https://github.com/pylint-dev/pylint/issues/3045
"unused-argument", # Rely on ruff ARG002 for this
"unused-import", # Rely on ruff F401 for this
Expand Down Expand Up @@ -382,6 +386,8 @@ dev-dependencies = [
"mypy>=1.8", # Pin for mutable-override
"paper-qa[ldp,typing]",
"pre-commit~=3.4", # Pin to keep recent
"pydantic~=2.0",
"pylint-pydantic",
"pytest-asyncio",
"pytest-recording",
"pytest-rerunfailures",
Expand Down
Loading