Skip to content

Commit

Permalink
Deal with *vararg, **kwarg, and * in arg list (#1)
Browse files Browse the repository at this point in the history
  • Loading branch information
cyyc1 authored Oct 4, 2022
1 parent 2886eec commit a2dc5e1
Show file tree
Hide file tree
Showing 7 changed files with 467 additions and 31 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# "--exit-zero" treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --max-complexity=10 --max-line-length=80 --statistics
flake8 . --count --max-complexity=10 --max-line-length=88 --statistics
- name: Test with pytest
run: |
pytest
34 changes: 23 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,15 @@ There is one violation code that this plugin reports:

### _Wrong_

This plugin, as well as [PEP8](https://peps.python.org/pep-0008/#indentation), considers the following indentation styles wrong:
This plugin considers the following indentation styles wrong:

```python
def some_function(arg1,
*,
arg2,
arg3):
print(arg1)
```

```python
def some_function(argument1,
Expand All @@ -33,12 +41,7 @@ def some_function(argument1,
print(argument1)
```

```python
def some_function(arg1,
arg2,
arg3):
print(arg1)
```
This following style above is the style choice of the [`black` formatter](https://github.com/psf/black). Both this plugin and [PEP8](https://peps.python.org/pep-0008/#indentation) consider it wrong because arguments and function names would be difficult to visually distinghish.

```python
def some_function(
Expand All @@ -49,8 +52,6 @@ def some_function(
print(arg1)
```

Note: this style above is the style choice of the [`black` formatter](https://github.com/psf/black). This style is wrong because arguments and function names would be difficult to visually distinghish.

### _Correct_

Correspondingly, here are the correct indentation styles:
Expand All @@ -59,6 +60,7 @@ Correspondingly, here are the correct indentation styles:
def some_function(
arg1: int,
arg2: list,
*,
arg3: bool = None,
):
print(arg1)
Expand Down Expand Up @@ -111,9 +113,19 @@ def some_func(

## Rationale

When we only indent by 4 spaces in function definitions, it is difficult to visually distinguish function arguments with the function name and the function body. This reduces readability.
When we only indent by 4 spaces in function definitions, it is difficult to visually distinguish function arguments with the function name and the function body. This reduces readability. It is similar for base classes in class definitions, but it's less of an issue than function definitions.

It is similar for base classes in class definitions, but it's less of an issue than function definitions.
Specifically, the following style is allowed by PEP8 but this plugin still consider it wrong, because it could lead to messy code diff when refactoring:

```diff
- def some_very_very_very_very_long_func(arg1,
+ def refactored_function_name(arg1,
arg2,
arg3,

):
return None
```

## Interaction with other style checkers and formatters

Expand Down
243 changes: 227 additions & 16 deletions flake8_indent_in_def.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import ast
from typing import Generator, Tuple, Type, Any, List, Union
import tokenize
from enum import Enum
from typing import Generator, Tuple, Type, Any, List, Union, Dict, Optional

import importlib.metadata as importlib_metadata

Expand All @@ -17,22 +19,83 @@

EXPECTED_INDENT = 8 # https://peps.python.org/pep-0008/#indentation

OP_TOKEN_CODE = 54 # "OP" means operator token
NL_TOKEN_CODE = 61 # "NL" means new line
NEWLINE_TOKEN_CODE = 4


class ArgType(Enum):
REGULAR = 1
POS_ONLY = 2
KW_ONLY = 3
VARARG = 4
KWARG = 5


class Visitor(ast.NodeVisitor):
def __init__(self) -> None:
def __init__(self, tokens: List[tokenize.TokenInfo]) -> None:
self._tokens: List[tokenize.TokenInfo] = tokens
self.violations: List[Tuple[int, int, str]] = []

def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
self._visit_func_args_or_class_bases(node, node.args.args, is_func=True)
sorted_args, arg_type_lookup, has_star = self._collect_func_args(node)
self._visit_func_args_or_class_bases(
node=node,
args_or_bases=sorted_args,
is_func=True,
arg_type_lookup=arg_type_lookup,
)
if has_star:
self._visit_star_in_arg_list(node)

def visit_ClassDef(self, node: ast.ClassDef) -> None:
self._visit_func_args_or_class_bases(node, node.bases, is_func=False)

@classmethod
def _collect_func_args(
cls,
node: ast.FunctionDef,
) -> Tuple[List, Dict[ast.arg, ArgType], bool]:
all_args: List[ast.arg] = []
arg_type_lookup: Dict[ast.arg, ArgType] = {}

has_star = False # it means there's a '*,' in the argument list

if node.args.args:
all_args.extend(node.args.args)
for arg_ in node.args.args:
arg_type_lookup[arg_] = ArgType.REGULAR

if node.args.posonlyargs:
has_star = True
all_args.extend(node.args.posonlyargs)
for arg_ in node.args.posonlyargs:
arg_type_lookup[arg_] = ArgType.POS_ONLY

if node.args.kwonlyargs:
has_star = True
all_args.extend(node.args.kwonlyargs)
for arg_ in node.args.kwonlyargs:
arg_type_lookup[arg_] = ArgType.KW_ONLY

if node.args.vararg is not None:
all_args.append(node.args.vararg)
arg_type_lookup[node.args.vararg] = ArgType.VARARG

if node.args.kwarg is not None:
all_args.append(node.args.kwarg)
arg_type_lookup[node.args.kwarg] = ArgType.KWARG

sorted_args = sorted(all_args, key=lambda x: x.lineno, reverse=False)

return sorted_args, arg_type_lookup, has_star

def _visit_func_args_or_class_bases(
self,
node: Union[ast.FunctionDef, ast.ClassDef],
args_or_bases: Union[List[ast.arg], List[ast.Name]],
is_func: bool,
arg_type_lookup: Optional[Dict[ast.arg, ArgType]] = None,
) -> None:
if is_func:
code01 = IND101
Expand All @@ -42,41 +105,189 @@ def _visit_func_args_or_class_bases(
code02 = IND202

if len(args_or_bases) > 0:
def_line_num = node.lineno
function_def_line_num = node.lineno
def_col_offset = node.col_offset

if args_or_bases[0].lineno == def_line_num:
if args_or_bases[0].lineno == function_def_line_num:
for item in args_or_bases[1:]:
if item.lineno != def_line_num:
self.violations.append(
(item.lineno, item.col_offset + 1, code02),
)
if item.lineno != function_def_line_num:
arg_type = arg_type_lookup[item] if is_func else None
col_offset = self._calc_col_offset(item, arg_type)
self.violations.append((item.lineno, col_offset, code02))

for i, item in enumerate(args_or_bases):
if i == 0:
prev_item_line_num = def_line_num
prev_item_line_num = function_def_line_num
else:
prev_item_line_num = args_or_bases[i - 1].lineno

# Only enforce indentation when this arg is on a new line
if item.lineno > prev_item_line_num:
if item.col_offset - def_col_offset != EXPECTED_INDENT:
self.violations.append(
(item.lineno, item.col_offset + 1, code01),
)
arg_type = arg_type_lookup[item] if is_func else None
if self._not_expected_indent(item, def_col_offset, arg_type):
col_offset = self._calc_col_offset(item, arg_type)
self.violations.append((item.lineno, col_offset, code01))

self.generic_visit(node)

@classmethod
def _calc_col_offset(
cls,
item: Union[ast.arg, ast.Name],
arg_type: Optional[ArgType] = None,
) -> int:
if isinstance(item, ast.Name): # this means base class
return item.col_offset + 1

arg_type = ArgType.REGULAR if arg_type is None else arg_type
return cls._calc_col_offset_for_func_args(item, arg_type)

@classmethod
def _calc_col_offset_for_func_args(
cls,
arg_: ast.arg,
arg_type: ArgType,
) -> int:
if arg_type in {ArgType.REGULAR, ArgType.POS_ONLY, ArgType.KW_ONLY}:
return arg_.col_offset + 1

if arg_type == ArgType.VARARG:
return arg_.col_offset + 1 - 1 # '-1' because of '*' before vararg

if arg_type == ArgType.KWARG:
return arg_.col_offset + 1 - 2 # '-2' because of '**' before kwarg

@classmethod
def _not_expected_indent(
cls,
item: Union[ast.arg, ast.Name],
def_col_offset: int,
arg_type: Optional[ArgType] = None,
) -> bool:
if isinstance(item, ast.Name): # this means base class
return item.col_offset - def_col_offset != EXPECTED_INDENT

arg_type = ArgType.REGULAR if arg_type is None else arg_type
if arg_type in {ArgType.REGULAR, ArgType.POS_ONLY, ArgType.KW_ONLY}:
expected_indent_ = EXPECTED_INDENT
elif arg_type == ArgType.VARARG:
expected_indent_ = EXPECTED_INDENT + 1 # because '*vararg'
elif arg_type == ArgType.KWARG:
expected_indent_ = EXPECTED_INDENT + 2 # because '**kwarg'
else:
# this branch can't be reached in theory
expected_indent_ = EXPECTED_INDENT

return item.col_offset - def_col_offset != expected_indent_

def _visit_star_in_arg_list(self, node: ast.FunctionDef):
func_def_lineno = node.lineno
func_end_lineno = node.end_lineno

# We skip the last 2 tokens because the last token from the tokenizer
# is always ENDMARKER (type 0) and '*' cannot be the 2nd to last token.
# And then we also skip the 0th token because that is always
# the ENCODING token (type 62).
for i in range(1, len(self._tokens) - 2):
this_token = self._tokens[i]
this_lineno = this_token.start[0]
this_col = this_token.start[1] + 1
if func_def_lineno < this_lineno <= func_end_lineno:
if self._is_a_violation(node, self._tokens, i):
self.violations.append((this_lineno, this_col, IND101))

@classmethod
def _is_a_violation(
cls,
node: ast.FunctionDef,
tokens: List[tokenize.TokenInfo],
index: int,
) -> bool:
prev_token = tokens[index - 1]
this_token = tokens[index]
next_token = tokens[index + 1]
next_next_token = tokens[index + 2]

is_qualifying_star = (
cls._is_star_comma(this=this_token, next=next_token)
or cls._is_star_newline_comma(
this=this_token,
next=next_token,
next_next=next_next_token,
)
)

is_1st_symbol = cls._star_is_1st_non_empty_symbol_on_this_line(
prev=prev_token,
this=this_token,
)

not_expected_indent = (
this_token.start[1] - node.col_offset != EXPECTED_INDENT
)

return is_qualifying_star and is_1st_symbol and not_expected_indent

@classmethod
def _is_star_comma(
cls,
this: tokenize.TokenInfo,
next: tokenize.TokenInfo,
) -> bool:
return cls._is_star(this) and cls._is_comma(next)

@classmethod
def _is_star_newline_comma(
cls,
this: tokenize.TokenInfo,
next: tokenize.TokenInfo,
next_next: tokenize.TokenInfo,
) -> bool:
return (
cls._is_star(this)
and cls._is_newline(next)
and cls._is_comma(next_next)
)

@classmethod
def _is_star(cls, token: tokenize.TokenInfo) -> bool:
return token.type == OP_TOKEN_CODE and token.string == '*'

@classmethod
def _is_comma(cls, token: tokenize.TokenInfo) -> bool:
return token.type == OP_TOKEN_CODE and token.string == ','

@classmethod
def _is_newline(cls, token: tokenize.TokenInfo) -> bool:
return (
token.type in {NL_TOKEN_CODE, NEWLINE_TOKEN_CODE}
and token.string == '\n'
)

@classmethod
def _star_is_1st_non_empty_symbol_on_this_line(
cls,
prev: tokenize.TokenInfo, # the previous token
this: tokenize.TokenInfo, # it's expected to be '*'
) -> bool:
return prev.start[0] < this.start[0] or prev.string.strip() == ''


class Plugin:
name = __name__
version = importlib_metadata.version(__name__)

def __init__(self, tree: ast.AST) -> None:
def __init__(
self,
tree: ast.AST,
file_tokens: List[tokenize.TokenInfo] = None,
) -> None:
self._tree = tree
self._file_tokens = file_tokens

def run(self) -> Generator[Tuple[int, int, str, Type[Any]], None, None]:
visitor = Visitor()
visitor = Visitor(self._file_tokens)

visitor.visit(self._tree)
for line, col, msg in visitor.violations:
yield line, col, msg, type(self)
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = flake8_indent_in_def
version = 0.1.0
version = 0.1.1
description = A flake8 plugin that enforces 8-space indentation in function/class definitions
long_description = file: README.md
long_description_content_type = text/markdown
Expand Down
Loading

0 comments on commit a2dc5e1

Please sign in to comment.