[SPARK-44984][PYTHON][CONNECT] Remove _get_alias from DataFrame

### What changes were proposed in this pull request? Remove `_get_alias` from DataFrame ### Why are the changes needed? `_get_alias` was added in the [initial PR](apache@6637bbe), but seems unneeded - field `alias` in `plan.Project` is always `None`; - `_get_alias` takes no parameter, but is used to replace a specify column name, the logic is weird when the column name varies; ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI ### Was this patch authored or co-authored using generative AI tooling? No Closes apache#42698 from zhengruifeng/py_connect_del_alias. Authored-by: Ruifeng Zheng <ruifengz@apache.org> Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
jessejay-ch · Aug 28, 2023 · 474f64a · 474f64a
1 parent 5b69dfd
commit 474f64a
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 14 deletions.
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
@@ -1573,14 +1573,6 @@ def sampleBy(
 
     sampleBy.__doc__ = PySparkDataFrame.sampleBy.__doc__
 
-    def _get_alias(self) -> Optional[str]:
-        p = self._plan
-        while p is not None:
-            if isinstance(p, plan.Project) and p.alias:
-                return p.alias
-            p = p._child
-        return None
-
     def __getattr__(self, name: str) -> "Column":
         if self._plan is None:
             raise SparkConnectException("Cannot analyze on empty plan.")
@@ -1607,9 +1599,8 @@ def __getattr__(self, name: str) -> "Column":
                 "'%s' object has no attribute '%s'" % (self.__class__.__name__, name)
             )
 
-        alias = self._get_alias()
         return _to_col_with_plan_id(
-            col=alias if alias is not None else name,
+            col=name,
             plan_id=self._plan._plan_id,
         )
 
@@ -1625,8 +1616,6 @@ def __getitem__(self, item: Union[Column, List, Tuple]) -> "DataFrame":
 
     def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Column, "DataFrame"]:
         if isinstance(item, str):
-            # Check for alias
-            alias = self._get_alias()
             if self._plan is None:
                 raise SparkConnectException("Cannot analyze on empty plan.")
 
@@ -1635,7 +1624,7 @@ def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Colum
                 self.select(item).isLocal()
 
             return _to_col_with_plan_id(
-                col=alias if alias is not None else item,
+                col=item,
                 plan_id=self._plan._plan_id,
             )
         elif isinstance(item, Column):

diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
@@ -464,7 +464,6 @@ class Project(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], *columns: "ColumnOrName") -> None:
         super().__init__(child)
         self._columns = list(columns)
-        self.alias: Optional[str] = None
         self._verify_expressions()
 
     def _verify_expressions(self) -> None: