Source code for message_ix_models.model.material.report.config

from dataclasses import dataclass, field
from itertools import count, product
from typing import Literal

import pandas as pd
from ixmp.report.common import RENAME_DIMS

from message_ix_models.util import package_data_path



[docs]
@dataclass
class Config:
    """Configuration for reporting of a subset of material data."""

    #: Prefix or initial fragment of IAMC ‘variable’ name.
    iamc_prefix: str

    #: Units of measure for the reported data.
    unit: Literal["Mt/yr", "GWa", "Mt CH4/yr", "GW"]

    #: :mod:`message_ix.report` key from which to retrieve the data.
    var: Literal["out", "in", "ACT", "emi", "CAP"]

    #: Data frame with:
    #:
    #: - MultiIndex levels including 1 or more of :math:`(c, l, m, t)`.
    #: - 3 columns:
    #:   - "iamc_name": a (fragment of) an IAMC ‘variable’ name. This is appended to
    #:     to :attr:`iamc_prefix` to construct a complete name.
    #:   - "short_name": …
    #:   - "unit": units of measure.
    #:
    #: This expresses a mapping between the index entries (=indices of reported data)
    #: and the information in the 3 columns.
    mapping: pd.DataFrame = field(
        default_factory=lambda: pd.DataFrame(
            columns=["iamc_name", "short_name", "unit"],
        )
    )


[docs]
    @classmethod
    def from_files(cls, category: str) -> "Config":
        """Create a Config instance from 1 or 2 YAML files.

        A file like :file:`message_ix_models/data/material/reporting/{category}.yaml` is
        read and used to populate a new instance. The file must have:

        - Top-level keys corresponding to :attr:`iamc_prefix`, :attr:`unit`, and
          :attr:`var`.
        - A top-level key ``vars:`` containing a mapping compatible with
          :meth:`use_vars_dict`.

        If a file exists in the same directory named like
        :file:`{category}_aggregates.yaml`, it is also read, and its contents passed to
        :meth:`use_aggregates_dict`.
        """
        import yaml

        # Handle basic configuration file
        path = package_data_path("material", "reporting", f"{category}.yaml")
        with open(path) as f:  # Raises FileNotFoundError on missing file
            kw = yaml.safe_load(f)  # Raises on invalid YAML

        # Remove the "vars" top-level key from the file
        vars = kw.pop("vars")

        # Create a ReporterConfig instance
        result = cls(**kw)

        # Update mapping data frame using `vars`
        result.use_vars_dict(vars)

        # Handle aggregates configuration file
        path_agg = path.with_name(f"{category}_aggregates.yaml")
        try:
            with open(path_agg) as f:
                data_agg = yaml.safe_load(f)
        except FileNotFoundError:
            data_agg = dict()  # No aggregates file

        result.use_aggregates_dict(data_agg)

        return result



[docs]
    def check_mapping(self) -> None:
        """Assert that :attr:`mapping` has the correct structure and is complete."""
        assert self.mapping.empty or set(self.mapping.index.names) <= set("clmt")
        assert {"iamc_name", "short_name", "unit"} == set(self.mapping.columns)
        assert not self.mapping.isna().any(axis=None)



[docs]
    def use_aggregates_dict(self, data: dict) -> None:
        """Update :attr:`mapping` from `data`.

        This method handles `data` with structure equivalent to the following YAML
        content:

        .. code-block:: yaml

           level_1:
             Chemicals|Liquids|Other:
               short: fe_pe_chem_oth
               components: [ fe_pe_hvc_oth ]
             Chemicals|Liquids|Biomass:
               short: fe_pe_chem_bio
               components: [ fe_pe_hvc_bio_eth ]
             # Any number of similar entries
           level_2:
             Heat:
               short: fe_pe_heat
               components:
               - fe_pe_cement_heat
               - fe_pe_aluminum_heat
               - fe_pe_steel_heat
               - fe_pe_other_heat
             # Any number of similar entries

        In general:

        - Top-level keys may be "level_1", "level_2", etc. Additional top-level keys
          like "iamc_prefix", "unit", and "var" are checked against the corresponding
          attributes.
        - Second-level keys are fragments of IAMC ‘variable’ names
        - Third level keys must be:

          - "short": A single string. See the description of the "short_name" column in
            :attr:`mapping`. This is the aggregate to be produced.
          - "components": A list of strings. These are the components of the
            aggregation. Components referenced under "level_1" must already be present
            in :attr:`mapping`. Components referenced under "level_2" may include the
            aggregates described by "level_1", etc.
        """
        # Check that other entries in `data` (e.g. loaded from YAML) match
        for k in ("iamc_prefix", "unit", "var"):
            assert data.pop(k, getattr(self, k)) == getattr(self, k)

        dims = self.mapping.index.names

        # Iterate over top-level keys: "level_1", "level_2", etc.
        for k_level in map("level_{}".format, count(start=1)):
            try:
                # Iterate over aggregates defined in this "level"
                dfs = []
                for k, v in data.pop(k_level).items():
                    # Extract aggregate name and components
                    d = dict(iamc_name=k, agg=v["short"], short_name=v["components"])
                    # Convert to DataFrame with desired structure
                    dfs.append(pd.DataFrame(d))
            except KeyError:
                break  # No data for this or any subsequent levels; finish

            # The merge and concat steps must be repeated on every iteration so that
            # aggregates defined under "level_2" may refer to aggregates defined under
            # "level_1" etc.

            # - Concatenate together all `dfs`.
            # - Merge with (c, l, m, t, short_name, unit) from self.mapping (omit
            #   existing iamc_name), on the short_name values.
            # - Replace the existing short_name with aggregate short_name.
            # - Restore multiindex.
            sn = "short_name"
            agg_mapping = (
                pd.concat(dfs)
                .merge(self.mapping.reset_index().drop(["iamc_name"], axis=1), on=[sn])
                .drop([sn], axis=1)
                .rename(columns={"agg": sn})
                .set_index(dims)
            )
            # Concatenate to exixsting mappings
            self.mapping = pd.concat([self.mapping, agg_mapping])

        self.check_mapping()



[docs]
    def use_vars_dict(self, data: dict) -> None:
        """Update :attr:`mapping` using `data`.

        This handles `data` with structure equivalent to the following YAML content:

        .. code-block:: yaml

           Chemicals|High-Value Chemicals|Electricity|Steam Cracking:
             filter:
               commodity: electr
               level: final
               mode: [vacuum_gasoil, atm_gasoil, naphtha, ethane, propane]
               technology: steam_cracker_petro,
             short: fe_pe_hvc_el_sc
             unit: kg  # Optional

           # Any number of similar entries

        Within this:

        - ``Chemicals|High-Value Chemicals|Electricity|Steam Cracking`` is a (fragment
          of) an IAMC ‘variable’ name.
        - ``filter`` entries may have values that are strings or lists of strings.
          The subkeys may include the MESSAGEix sets [technology, mode, commodity,
          level].
        """

        dims: set[str] = set()
        dfs = []
        for iamc_name, values in data.items():
            # Convert:
            # - scalar/single str entries to length-1 list of str
            # - long/full message_ix set names ("technology") to short dim IDs ("t")
            filters = {
                RENAME_DIMS[k]: [v] if isinstance(v, str) else v
                for k, v in values["filter"].items()
            }
            dims |= filters.keys()

            # - Create data frame: all valid combinations of indices
            # - Set other columns
            dfs.append(
                pd.DataFrame(
                    list(product(*filters.values())), columns=list(filters.keys())
                ).assign(
                    iamc_name=iamc_name,
                    short_name=values["short"],
                    unit=values.get("unit", self.unit),
                )
            )

        # Concatenate all mappings; set multi-index based on `dims`
        self.mapping = pd.concat(dfs).set_index(sorted(dims))
        self.check_mapping()