Skip to content

Commit

Permalink
[SPARK-44984][PYTHON][CONNECT] Remove _get_alias from DataFrame
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Remove `_get_alias` from DataFrame

### Why are the changes needed?
`_get_alias` was added in the [initial PR](apache@6637bbe), but seems unneeded

- field `alias` in `plan.Project` is always `None`;
- `_get_alias` takes no parameter, but is used to replace a specify column name, the logic is weird when the column name varies;

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
CI

### Was this patch authored or co-authored using generative AI tooling?
No

Closes apache#42698 from zhengruifeng/py_connect_del_alias.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
  • Loading branch information
zhengruifeng committed Aug 28, 2023
1 parent 5b69dfd commit 474f64a
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 14 deletions.
15 changes: 2 additions & 13 deletions python/pyspark/sql/connect/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1573,14 +1573,6 @@ def sampleBy(

sampleBy.__doc__ = PySparkDataFrame.sampleBy.__doc__

def _get_alias(self) -> Optional[str]:
p = self._plan
while p is not None:
if isinstance(p, plan.Project) and p.alias:
return p.alias
p = p._child
return None

def __getattr__(self, name: str) -> "Column":
if self._plan is None:
raise SparkConnectException("Cannot analyze on empty plan.")
Expand All @@ -1607,9 +1599,8 @@ def __getattr__(self, name: str) -> "Column":
"'%s' object has no attribute '%s'" % (self.__class__.__name__, name)
)

alias = self._get_alias()
return _to_col_with_plan_id(
col=alias if alias is not None else name,
col=name,
plan_id=self._plan._plan_id,
)

Expand All @@ -1625,8 +1616,6 @@ def __getitem__(self, item: Union[Column, List, Tuple]) -> "DataFrame":

def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Column, "DataFrame"]:
if isinstance(item, str):
# Check for alias
alias = self._get_alias()
if self._plan is None:
raise SparkConnectException("Cannot analyze on empty plan.")

Expand All @@ -1635,7 +1624,7 @@ def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Colum
self.select(item).isLocal()

return _to_col_with_plan_id(
col=alias if alias is not None else item,
col=item,
plan_id=self._plan._plan_id,
)
elif isinstance(item, Column):
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/sql/connect/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,6 @@ class Project(LogicalPlan):
def __init__(self, child: Optional["LogicalPlan"], *columns: "ColumnOrName") -> None:
super().__init__(child)
self._columns = list(columns)
self.alias: Optional[str] = None
self._verify_expressions()

def _verify_expressions(self) -> None:
Expand Down

0 comments on commit 474f64a

Please sign in to comment.