Source code for message_ix_models.model.material.report.config

from dataclasses import dataclass, field
from itertools import count, product
from typing import Literal

import pandas as pd
from ixmp.report.common import RENAME_DIMS

from message_ix_models.util import package_data_path


[docs] @dataclass class Config: """Configuration for reporting of a subset of material data.""" #: Prefix or initial fragment of IAMC ‘variable’ name. iamc_prefix: str #: Units of measure for the reported data. unit: Literal["Mt/yr", "GWa", "Mt CH4/yr", "GW"] #: :mod:`message_ix.report` key from which to retrieve the data. var: Literal["out", "in", "ACT", "emi", "CAP"] #: Data frame with: #: #: - MultiIndex levels including 1 or more of :math:`(c, l, m, t)`. #: - 3 columns: #: - "iamc_name": a (fragment of) an IAMC ‘variable’ name. This is appended to #: to :attr:`iamc_prefix` to construct a complete name. #: - "short_name": … #: - "unit": units of measure. #: #: This expresses a mapping between the index entries (=indices of reported data) #: and the information in the 3 columns. mapping: pd.DataFrame = field( default_factory=lambda: pd.DataFrame( columns=["iamc_name", "short_name", "unit"], ) )
[docs] @classmethod def from_files(cls, category: str) -> "Config": """Create a Config instance from 1 or 2 YAML files. A file like :file:`message_ix_models/data/material/reporting/{category}.yaml` is read and used to populate a new instance. The file must have: - Top-level keys corresponding to :attr:`iamc_prefix`, :attr:`unit`, and :attr:`var`. - A top-level key ``vars:`` containing a mapping compatible with :meth:`use_vars_dict`. If a file exists in the same directory named like :file:`{category}_aggregates.yaml`, it is also read, and its contents passed to :meth:`use_aggregates_dict`. """ import yaml # Handle basic configuration file path = package_data_path("material", "reporting", f"{category}.yaml") with open(path) as f: # Raises FileNotFoundError on missing file kw = yaml.safe_load(f) # Raises on invalid YAML # Remove the "vars" top-level key from the file vars = kw.pop("vars") # Create a ReporterConfig instance result = cls(**kw) # Update mapping data frame using `vars` result.use_vars_dict(vars) # Handle aggregates configuration file path_agg = path.with_name(f"{category}_aggregates.yaml") try: with open(path_agg) as f: data_agg = yaml.safe_load(f) except FileNotFoundError: data_agg = dict() # No aggregates file result.use_aggregates_dict(data_agg) return result
[docs] def check_mapping(self) -> None: """Assert that :attr:`mapping` has the correct structure and is complete.""" assert self.mapping.empty or set(self.mapping.index.names) <= set("clmt") assert {"iamc_name", "short_name", "unit"} == set(self.mapping.columns) assert not self.mapping.isna().any(axis=None)
[docs] def use_aggregates_dict(self, data: dict) -> None: """Update :attr:`mapping` from `data`. This method handles `data` with structure equivalent to the following YAML content: .. code-block:: yaml level_1: Chemicals|Liquids|Other: short: fe_pe_chem_oth components: [ fe_pe_hvc_oth ] Chemicals|Liquids|Biomass: short: fe_pe_chem_bio components: [ fe_pe_hvc_bio_eth ] # Any number of similar entries level_2: Heat: short: fe_pe_heat components: - fe_pe_cement_heat - fe_pe_aluminum_heat - fe_pe_steel_heat - fe_pe_other_heat # Any number of similar entries In general: - Top-level keys may be "level_1", "level_2", etc. Additional top-level keys like "iamc_prefix", "unit", and "var" are checked against the corresponding attributes. - Second-level keys are fragments of IAMC ‘variable’ names - Third level keys must be: - "short": A single string. See the description of the "short_name" column in :attr:`mapping`. This is the aggregate to be produced. - "components": A list of strings. These are the components of the aggregation. Components referenced under "level_1" must already be present in :attr:`mapping`. Components referenced under "level_2" may include the aggregates described by "level_1", etc. """ # Check that other entries in `data` (e.g. loaded from YAML) match for k in ("iamc_prefix", "unit", "var"): assert data.pop(k, getattr(self, k)) == getattr(self, k) dims = self.mapping.index.names # Iterate over top-level keys: "level_1", "level_2", etc. for k_level in map("level_{}".format, count(start=1)): try: # Iterate over aggregates defined in this "level" dfs = [] for k, v in data.pop(k_level).items(): # Extract aggregate name and components d = dict(iamc_name=k, agg=v["short"], short_name=v["components"]) # Convert to DataFrame with desired structure dfs.append(pd.DataFrame(d)) except KeyError: break # No data for this or any subsequent levels; finish # The merge and concat steps must be repeated on every iteration so that # aggregates defined under "level_2" may refer to aggregates defined under # "level_1" etc. # - Concatenate together all `dfs`. # - Merge with (c, l, m, t, short_name, unit) from self.mapping (omit # existing iamc_name), on the short_name values. # - Replace the existing short_name with aggregate short_name. # - Restore multiindex. sn = "short_name" agg_mapping = ( pd.concat(dfs) .merge(self.mapping.reset_index().drop(["iamc_name"], axis=1), on=[sn]) .drop([sn], axis=1) .rename(columns={"agg": sn}) .set_index(dims) ) # Concatenate to exixsting mappings self.mapping = pd.concat([self.mapping, agg_mapping]) self.check_mapping()
[docs] def use_vars_dict(self, data: dict) -> None: """Update :attr:`mapping` using `data`. This handles `data` with structure equivalent to the following YAML content: .. code-block:: yaml Chemicals|High-Value Chemicals|Electricity|Steam Cracking: filter: commodity: electr level: final mode: [vacuum_gasoil, atm_gasoil, naphtha, ethane, propane] technology: steam_cracker_petro, short: fe_pe_hvc_el_sc unit: kg # Optional # Any number of similar entries Within this: - ``Chemicals|High-Value Chemicals|Electricity|Steam Cracking`` is a (fragment of) an IAMC ‘variable’ name. - ``filter`` entries may have values that are strings or lists of strings. The subkeys may include the MESSAGEix sets [technology, mode, commodity, level]. """ dims: set[str] = set() dfs = [] for iamc_name, values in data.items(): # Convert: # - scalar/single str entries to length-1 list of str # - long/full message_ix set names ("technology") to short dim IDs ("t") filters = { RENAME_DIMS[k]: [v] if isinstance(v, str) else v for k, v in values["filter"].items() } dims |= filters.keys() # - Create data frame: all valid combinations of indices # - Set other columns dfs.append( pd.DataFrame( list(product(*filters.values())), columns=list(filters.keys()) ).assign( iamc_name=iamc_name, short_name=values["short"], unit=values.get("unit", self.unit), ) ) # Concatenate all mappings; set multi-index based on `dims` self.mapping = pd.concat(dfs).set_index(sorted(dims)) self.check_mapping()