Skip to content

Commit

Permalink
Merge pull request #1046 from marc-vdm/first_level_cont
Browse files Browse the repository at this point in the history
CA updates + CA bugfixes + First-Tier contribution analysis tab
  • Loading branch information
mrvisscher authored Dec 17, 2024
2 parents 023eb5a + 26b1f14 commit a094a50
Show file tree
Hide file tree
Showing 7 changed files with 647 additions and 52 deletions.
64 changes: 44 additions & 20 deletions activity_browser/bwutils/multilca.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import OrderedDict
from copy import deepcopy
from typing import Iterable, Optional, Union
from logging import getLogger

Expand Down Expand Up @@ -415,6 +416,7 @@ def _build_dict(
rev_dict: dict,
limit: int,
limit_type: str,
total_range: bool,
) -> dict:
"""Sort the given contribution array on method or reference flow column.
Expand All @@ -433,15 +435,32 @@ def _build_dict(
"""
topcontribution_dict = dict()
for fu_or_method, col in FU_M_index.items():
contribution_col = contributions[col, :]
if total_range: # total is based on the range
total = np.abs(contribution_col).sum()
else: # total is based on the score
total = contribution_col.sum()

top_contribution = ca.sort_array(
contributions[col, :], limit=limit, limit_type=limit_type
contribution_col, limit=limit, limit_type=limit_type, total=total
)

# split and calculate remaining rest sections for positive and negative part
pos_rest = (
np.sum(contribution_col[contribution_col > 0])
- np.sum(top_contribution[top_contribution[:, 0] > 0][:, 0])
)
neg_rest = (
np.sum(contribution_col[contribution_col < 0])
- np.sum(top_contribution[top_contribution[:, 0] < 0][:, 0])
)

cont_per = OrderedDict()
cont_per.update(
{
("Total", ""): contributions[col, :].sum(),
("Rest", ""): contributions[col, :].sum()
- top_contribution[:, 0].sum(),
("Total", ""): total,
("Rest (+)", ""): pos_rest,
("Rest (-)", ""): neg_rest,
}
)
for value, index in top_contribution:
Expand Down Expand Up @@ -544,12 +563,12 @@ def join_df_with_metadata(

if special_keys:
# replace index keys with labels
try: # first put Total and Rest to the first two positions in the dataframe
try: # first put Total, Rest (+) and Rest (-) to the first three positions in the dataframe
complete_index = special_keys + keys
joined = joined.reindex(complete_index, axis="index", fill_value=0.0)
except:
log.error(
"Could not put Total and Rest on positions 0 and 1 in the dataframe."
"Could not put 'Total', 'Rest (+)' and 'Rest (-)' on positions 0, 1 and 2 in the dataframe."
)
joined.index = cls.get_labels(joined.index, fields=x_fields)
return joined
Expand Down Expand Up @@ -583,18 +602,20 @@ def get_labelled_contribution_dict(
# If the cont_dict has tuples for keys, coerce df.columns into MultiIndex
if all(isinstance(k, tuple) for k in cont_dict.keys()):
df.columns = pd.MultiIndex.from_tuples(df.columns)
special_keys = [("Total", ""), ("Rest", "")]

special_keys = [("Total", ""), ("Rest (+)", ""), ("Rest (-)", "")]
# replace all 0 values with NaN and drop all rows with only NaNs
# EXCEPT for the special keys
df.index = ids_to_keys(df.index)
index = (
df.loc[df.index.difference(special_keys)]
.replace(0, np.nan)
.dropna(how="all")
.index.union(special_keys)
)
df = df.loc[index]
df = df.replace(0, np.nan)

# sort on absolute mean of a row
df_bot = deepcopy(df.loc[df.index.difference(special_keys)].dropna(how="all"))

func = lambda row: np.nanmean(np.abs(row))
if len(df_bot) > 1: # but only sort if there is something to sort
df_bot["_sort_me_"] = (df_bot.select_dtypes(include=np.number)).apply(func, axis=1)
df_bot.sort_values(by="_sort_me_", ascending=False, inplace=True)
del df_bot["_sort_me_"]

df = pd.concat([df.iloc[:3, :], df_bot], axis=0)

if not mask:
joined = self.join_df_with_metadata(
Expand All @@ -617,7 +638,7 @@ def adjust_table_unit(df: pd.DataFrame, method: Optional[tuple]) -> pd.DataFrame
"""Given a dataframe, adjust the unit of the table to either match the given method, or not exist."""
if "unit" not in df.columns:
return df
keys = df.index[~df["index"].isin({"Total", "Rest"})]
keys = df.index[~df["index"].isin({"Total", "Rest (+)", "Rest (-)"})]
unit = bd.Method(method).metadata.get("unit") if method else "unit"
df.loc[keys, "unit"] = unit
return df
Expand Down Expand Up @@ -791,6 +812,7 @@ def top_elementary_flow_contributions(
limit: int = 5,
normalize: bool = False,
limit_type: str = "number",
total_range: bool = True,
**kwargs,
) -> pd.DataFrame:
"""Return top EF contributions for either functional_unit or method.
Expand All @@ -807,6 +829,7 @@ def top_elementary_flow_contributions(
limit : The number of top contributions to consider
normalize : Determines whether or not to normalize the contribution values
limit_type : The type of limit, either 'number' or 'percent'
total_range : Whether to consider the total for contributions the range (True) or the score (False)
Returns
-------
Expand All @@ -830,7 +853,7 @@ def top_elementary_flow_contributions(
contributions = self.normalize(contributions)

top_cont_dict = self._build_dict(
contributions, index, rev_index, limit, limit_type
contributions, index, rev_index, limit, limit_type, total_range
)
labelled_df = self.get_labelled_contribution_dict(
top_cont_dict, x_fields=x_fields, y_fields=y_fields, mask=mask
Expand All @@ -846,6 +869,7 @@ def top_process_contributions(
limit: int = 5,
normalize: bool = False,
limit_type: str = "number",
total_range: bool = True,
**kwargs,
) -> pd.DataFrame:
"""Return top process contributions for functional_unit or method.
Expand Down Expand Up @@ -885,7 +909,7 @@ def top_process_contributions(
contributions = self.normalize(contributions)

top_cont_dict = self._build_dict(
contributions, index, rev_index, limit, limit_type
contributions, index, rev_index, limit, limit_type, total_range
)
labelled_df = self.get_labelled_contribution_dict(
top_cont_dict, x_fields=x_fields, y_fields=y_fields, mask=mask
Expand Down
81 changes: 69 additions & 12 deletions activity_browser/docs/wiki/LCA-Results.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

## Contribution Analysis
### Differences between approaches
Activity Browser has two contribution analysis approaches available to assess results,
`Elementary Flow (EF) Contributions` and `Process contributions`.
Activity Browser has three contribution analysis approaches available to assess results,
`Elementary Flow (EF) Contributions`, `Process contributions` and `First Tier (FT) Contributions`.

Before we discuss the different approaches, we introduce a small example for the production of _'steel'_:

Expand Down Expand Up @@ -45,37 +45,70 @@ For the system and functional unit above, this would be:

The _contribution matrix_ show the dis-aggregated results for each individual biosphere flow for each activity.

#### EF contributions
#### Elementary Flow (EF) contributions
If we take sum the _rows_ to one row, we get the EF contributions
(the contribution of all CO<sub>2</sub> and CH<sub>4</sub> impacts together).

In the case above, the EF contributions are:
- CO<sub>2</sub>: 1.5404... (96.3%)
- CH<sub>4</sub>: 0.0596... (3.7%)

#### Process contributions
If we take the sum of the _columns_ to one column, we get the process contributions
(the contribution of all coal, electricity and steel production impacts together).

In the case above, the process contributions are:
- coal production: 0.0596... (3.7%)
- electricity production: 0.5404... (62.5%)
- steel production: 1 (33.8%)
- electricity production: 0.5404... (33.8%)
- steel production: 1 (62.5%)

To summarize, the difference between EF and process contributions is the direction the contribution matrix is summed.

#### First Tier (FT) contributions
The FT contributions take a very different approach, instead of calculating the impact of processes anywhere in the
system, FT contributions are the process of the functional unit and all its inputs.
By calculating the impact of the inputs to the functional unit, the impacts are accumulated.
In the example above this would mean that the impact of _'coal'_ is calculated from only the coal needed directly by
_'steel production'_, the impact from coal produced for _'electricity production'_ would be included in the
_'electricty'_.
Together with the _direct_ impact from _'steel production'_, this is the _first tier_.

This approach becomes more useful when using large systems to accumulate impacts into relevant parts of your foreground
system.

Activity Browser calculates these impacts by applying _partial LCAs_ (LCA on part of the functional unit) on the inputs,
scaled to the functional unit.

In the case above, the FT contributions are:
- coal: 0.0298... (1.9%)
- electricity: 0.5702... (35.6%)
- steel production: 1 (62.5%)

Note that we now use the names of the products _'coal'_ and _'electricity'_ as we now assess the impacts of these inputs,
not the processes.

Note also how the impact of _'steel production'_ is unchanged, as this still shows the _direct_ impact, but that the
impact of _'electricity'_ is higher than _'electricity production'_ in the process contributions.
This is due to the fact that we include all impacts in the production of electricity, not just the _direct_ impacts.
However, these results are compensated by a lower impact of _'coal'_ (compared to process contributions of
_'coal production'_).
The total impact is still 1.6.

### Manipulating results
In this section we generalize a little bit for the different contribution approaches,
we call the _from_ part of the contributions (the EFs or activities above) _entities_.
we call the _from_ part of the contributions (the EFs or activities or FT above) _entities_.

There are several ways Activity Browser manipulates your results by default.
- The results are **sorted** so that the row with the largest (absolute) average values are shown first.
- A `cut-off` of 5% is applied, this only shows results that contribute at least 5% to the total result,
all other entities are grouped into a `Rest` group.
- A `cut-off` of 5% is applied, this only shows results that contribute at least 5% to the total range of results,
all other entities are grouped into a `Rest (+)` or `Rest (-)` groups.
- The contributions are _normalized_ to the impact of that reference flow, meaning they are show as a percentage,
counting up to 100% for every item you compare.

These actions are taken to show you the most relevant results.

You can manually manipulate the contribution results in the next menu, which we explain bit by bit below.
You can manually manipulate the contribution results in the menu shown below, which we will explain bit by bit
in the next sections.
![contributions cutoff](./assets/contribution_manipulation.png)

#### Cut-off
Expand All @@ -84,7 +117,8 @@ The `Relative` mode shows contributions _from_ entities of _x_% or higher.
The `Top #` mode shows contributions from the _x_ entities that contribute the most (as absolute).
You can adjust the `Cut-off level` to change how many results you see.

All results that don't make the cut-off will be grouped into the `Rest` group.
All results that don't make the cut-off will be grouped into the `Rest (+)` and `Rest (-)` groups.
The Rest groups are only present when there are positive or negative numbers remaining for the respective rest groups.

#### Compare
The `Compare` menu allows you to compare different dimensions of results.
Expand All @@ -106,8 +140,29 @@ By default, Activity Browser shows a plot and a table.
You can disable one of them if you want to focus on one of them.

#### Relative and Absolute
Finally, you can choose between `Relative` and `Absolute` results.
The `Relative` results will sum to 100%, the `Absolute` results will sum to the impact score.
You can choose between `Relative` and `Absolute` results.
The `Relative` results will sum to 100% (the total score), the `Absolute` results will sum to the impact score.

#### Range and Score
If the Cut-off type is `Relative`, you can choose between `Range` and `Score`.
This determines what you use as the _total_ to which the relative contributions are counted.
For `Range`, this is the full _range_ of results, for example, if all your negative results together have a score of -2
and all your positive results together have a score of 10, the _range_ is 12 (-2 * -1 + 10).
For `Score`, this is the total score (sum) of the results, for example, if all your negative results together have a
score of -2 and all your positive results together have a score of 10, the _score_ is 8 (-2 + 10).
The `Range` or `Score` setting are only used when 1) your Cut-off type is `Relative`
and 2) your results contain both positive and negative results.

### Positive and negative numbers in contribution results
It can happen in LCA that you get both positive and negative numbers in your contribution results.
Some of these reasons could be negative characterization factors, flows with negative numbers or using substitution flows.

When there are both positive and negative numbers in the result, Activity Browser will show a marker to indicate
where the total score is, and show positive and negative contributions to the impact separately.

Below is a simple example (with unrealistic values) to demonstrate this:

![CA example with positive and negative results](./assets/ca_positive_negative_example.png)

## Sankey
The `Sankey` tab shows results from [graph traversal](https://docs.brightway.dev/projects/graphtools/en/latest/index.html).
Expand All @@ -125,6 +180,8 @@ The `calculation depth` will stop traversing the supply chain once that number o
In the Sankey, the red arrows show the _cumulative_ impact of the _product_ flow
(_direct_ from that process and _indirect_ from all upstream processes involved in producing that product),
the boxes show the _direct_ (process contribution) impact of that process.
Effectively, the sankey graph is the First Tier contribution analysis, repeated for every activity you see in the graph,
making it _n-tier_ contributions.

Using the example above in the [contribution analysis](#contribution-analysis) section, we show the sankey below.
The [process contribution](#process-contributions) results are also shown in the boxes below.
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified activity_browser/docs/wiki/assets/contribution_manipulation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit a094a50

Please sign in to comment.