diff --git a/CHANGELOG.md b/CHANGELOG.md index 231a96d..2d819e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,15 @@ ## Development Changes: +* Update table suppression when totals are true for pivot table ([#165](https://github.com/AI-SDC/ACRO/pull/165)) +* Fix the problem of shape mismatch when there are two columns and the aggfunc is count or sum ([#167](https://github.com/AI-SDC/ACRO/pull/167)) +* Remove all files and folders created during testing ([#168](https://github.com/AI-SDC/ACRO/pull/168)) +* Create an example notebook with simple examples of acro ([#170](https://github.com/AI-SDC/ACRO/pull/170)) +* Add support for histogram ([#176](https://github.com/AI-SDC/ACRO/pull/176)) +* Add inherited members from acro_tables and acro_regression to the sphinx docs ([#177](https://github.com/AI-SDC/ACRO/pull/177)) +* Update the R help function ([#178](https://github.com/AI-SDC/ACRO/pull/178)) +* Update the finalise function by checking the provided folder name and ask for new one if it exists ([#179](https://github.com/AI-SDC/ACRO/pull/179)) +* Add histogram and survival analysis to R ([#182](https://github.com/AI-SDC/ACRO/pull/182)) ## Version 0.4.3 (Sep 22, 2023) diff --git a/acro/acro_tables.py b/acro/acro_tables.py index 295d746..ddc72ea 100644 --- a/acro/acro_tables.py +++ b/acro/acro_tables.py @@ -137,27 +137,7 @@ def crosstab( # pylint: disable=too-many-arguments,too-many-locals normalize, ) # delete empty rows and columns from table - deleted_rows = [] - deleted_cols = [] - # define empty columns and rows using boolean masks - empty_cols_mask = table.sum(axis=0) == 0 - empty_rows_mask = table.sum(axis=1) == 0 - - deleted_cols = list(table.columns[empty_cols_mask]) - table = table.loc[:, ~empty_cols_mask] - deleted_rows = list(table.index[empty_rows_mask]) - table = table.loc[~empty_rows_mask, :] - - # create a message with the deleted column's names - comments = [] - if deleted_cols: - msg_cols = ", ".join(str(col) for col in deleted_cols) - comments.append(f"Empty columns: {msg_cols} were deleted.") - if deleted_rows: - msg_rows = ", ".join(str(row) for row in deleted_rows) - comments.append(f"Empty rows: {msg_rows} were deleted.") - if comments: - logger.info(" ".join(comments)) + table, comments = delete_empty_rows_columns(table) masks = create_crosstab_masks( index, @@ -244,6 +224,9 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals (hierarchical indexes) on the index and columns of the result DataFrame. + To provide consistent behaviour with different aggregation functions, + 'empty' rows or columns -i.e. that are all NaN or 0 (count,sum) are removed. + Parameters ---------- data : DataFrame @@ -307,6 +290,9 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals sort, ) + # delete empty rows and columns from table + table, comments = delete_empty_rows_columns(table) + # suppression masks to apply based on the following checks masks: dict[str, DataFrame] = {} @@ -387,6 +373,7 @@ def pivot_table( # pylint: disable=too-many-arguments,too-many-locals summary=summary, outcome=outcome, output=[table], + comments=comments, ) return table @@ -837,6 +824,45 @@ def create_crosstab_masks( # pylint: disable=too-many-arguments,too-many-locals return masks +def delete_empty_rows_columns(table: DataFrame) -> tuple[DataFrame, list[str]]: + """Deletes empty rows and columns from table. + + Parameters + ---------- + table : DataFrame + The table where the empty rows and columns will be deleted from. + + Returns + ------- + DataFrame + The resulting table where the empty columns and rows were deleted. + list[str] + A comment showing information about the deleted columns and rows. + """ + deleted_rows = [] + deleted_cols = [] + # define empty columns and rows using boolean masks + empty_cols_mask = table.sum(axis=0) == 0 + empty_rows_mask = table.sum(axis=1) == 0 + + deleted_cols = list(table.columns[empty_cols_mask]) + table = table.loc[:, ~empty_cols_mask] + deleted_rows = list(table.index[empty_rows_mask]) + table = table.loc[~empty_rows_mask, :] + + # create a message with the deleted column's names + comments = [] + if deleted_cols: + msg_cols = ", ".join(str(col) for col in deleted_cols) + comments.append(f"Empty columns: {msg_cols} were deleted.") + if deleted_rows: + msg_rows = ", ".join(str(row) for row in deleted_rows) + comments.append(f"Empty rows: {msg_rows} were deleted.") + if comments: + logger.info(" ".join(comments)) + return (table, comments) + + def rounded_survival_table(survival_table): """Calculates the rounded surival function.""" death_censored = ( diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb index bb20099..84ea5eb 100644 --- a/notebooks/test.ipynb +++ b/notebooks/test.ipynb @@ -1657,141 +1657,6 @@ "table" ] }, - { - "cell_type": "code", - "execution_count": 17, - "id": "3f016823", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
year | \n", - "2010 | \n", - "2011 | \n", - "2012 | \n", - "2013 | \n", - "2014 | \n", - "2015 | \n", - "All | \n", - "
---|---|---|---|---|---|---|---|
grant_type | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
G | \n", - "138906688.0 | \n", - "127533696.0 | \n", - "171878704.0 | \n", - "203357200.0 | \n", - "206222208.0 | \n", - "133601200.0 | \n", - "9.814997e+08 | \n", - "
N | \n", - "0.0 | \n", - "7192804.0 | \n", - "7779685.0 | \n", - "8728330.0 | \n", - "7858697.0 | \n", - "8501187.0 | \n", - "4.006070e+07 | \n", - "
R | \n", - "504137056.0 | \n", - "532464704.0 | \n", - "480105472.0 | \n", - "511361408.0 | \n", - "554594176.0 | \n", - "551457280.0 | \n", - "3.134120e+09 | \n", - "
R/G | \n", - "46544000.0 | \n", - "128380000.0 | \n", - "134480000.0 | \n", - "134125000.0 | \n", - "142766000.0 | \n", - "146228992.0 | \n", - "7.325240e+08 | \n", - "
All | \n", - "689587776.0 | \n", - "795571264.0 | \n", - "794243904.0 | \n", - "857571968.0 | \n", - "911441088.0 | \n", - "839788672.0 | \n", - "4.888204e+09 | \n", - "