Comparer

The Comparer class is the main class of the ModelSkill package. It is returned by match(), from_matched() or as an element in a ComparerCollection. It holds the matched observation and model data for a single observation and has methods for plotting and skill assessment.

Main functionality:

selecting/filtering data
- sel()
- query()
skill assessment
- skill()
- gridded_skill() (for track observations)
plotting
load/save/export data

modelskill.Comparer

Bases: Scoreable

Comparer class for comparing model and observation data.

Typically, the Comparer is part of a ComparerCollection, created with the match function.

Parameters:

Name	Type	Description	Default
`matched_data`	`Dataset`	Matched data	required
`raw_mod_data`	`dict of modelskill.TimeSeries`	Raw model data. If None, observation and modeldata must be provided.	`None`

Examples:

>>> import modelskill as ms
>>> cmp1 = ms.match(observation, modeldata)
>>> cmp2 = ms.from_matched(matched_data)

See Also

modelskill.match, modelskill.from_matched

Source code in modelskill/comparison/_comparison.py

class Comparer(Scoreable):
    """
    Comparer class for comparing model and observation data.

    Typically, the Comparer is part of a ComparerCollection,
    created with the `match` function.

    Parameters
    ----------
    matched_data : xr.Dataset
        Matched data
    raw_mod_data : dict of modelskill.TimeSeries, optional
        Raw model data. If None, observation and modeldata must be provided.

    Examples
    --------
    >>> import modelskill as ms
    >>> cmp1 = ms.match(observation, modeldata)
    >>> cmp2 = ms.from_matched(matched_data)

    See Also
    --------
    modelskill.match, modelskill.from_matched
    """

    data: xr.Dataset
    raw_mod_data: Dict[str, TimeSeries]
    _obs_str = "Observation"
    plotter = ComparerPlotter

    def __init__(
        self,
        matched_data: xr.Dataset,
        raw_mod_data: Optional[Dict[str, TimeSeries]] = None,
    ) -> None:
        self.data = _parse_dataset(matched_data)
        self.raw_mod_data = (
            raw_mod_data
            if raw_mod_data is not None
            else {
                # key: ModelResult(value, gtype=self.data.gtype, name=key, x=self.x, y=self.y)
                key: TimeSeries(self.data[[key]])
                for key, value in matched_data.data_vars.items()
                if value.attrs["kind"] == "model"
            }
        )
        # TODO: validate that the names in raw_mod_data are the same as in matched_data
        assert isinstance(self.raw_mod_data, dict)
        for k in self.raw_mod_data.keys():
            v = self.raw_mod_data[k]
            if not isinstance(v, TimeSeries):
                try:
                    self.raw_mod_data[k] = TimeSeries(v)
                except Exception:
                    raise ValueError(
                        f"raw_mod_data[{k}] could not be converted to a TimeSeries object"
                    )
            else:
                assert isinstance(
                    v, TimeSeries
                ), f"raw_mod_data[{k}] must be a TimeSeries object"

        self.plot = Comparer.plotter(self)
        """Plot using the ComparerPlotter

        Examples
        --------
        >>> cmp.plot.timeseries()
        >>> cmp.plot.scatter()
        >>> cmp.plot.qq()
        >>> cmp.plot.hist()
        >>> cmp.plot.kde()
        >>> cmp.plot.box()
        >>> cmp.plot.residual_hist()
        >>> cmp.plot.taylor()        
        """

    @staticmethod
    def from_matched_data(
        data: xr.Dataset | pd.DataFrame,
        raw_mod_data: Optional[Dict[str, TimeSeries]] = None,
        obs_item: str | int | None = None,
        mod_items: Optional[Iterable[str | int]] = None,
        aux_items: Optional[Iterable[str | int]] = None,
        name: Optional[str] = None,
        weight: float = 1.0,
        x: Optional[float] = None,
        y: Optional[float] = None,
        z: Optional[float] = None,
        x_item: str | int | None = None,
        y_item: str | int | None = None,
        quantity: Optional[Quantity] = None,
    ) -> "Comparer":
        """Initialize from compared data"""
        if not isinstance(data, xr.Dataset):
            # TODO: handle raw_mod_data by accessing data.attrs["kind"] and only remove nan after
            data = _matched_data_to_xarray(
                data,
                obs_item=obs_item,
                mod_items=mod_items,
                aux_items=aux_items,
                name=name,
                x=x,
                y=y,
                z=z,
                x_item=x_item,
                y_item=y_item,
                quantity=quantity,
            )
            data.attrs["weight"] = weight
        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)

    def __repr__(self):
        out = [
            "<Comparer>",
            f"Quantity: {self.quantity}",
            f"Observation: {self.name}, n_points={self.n_points}",
            "Model(s):",
        ]
        for index, model in enumerate(self.mod_names):
            out.append(f"{index}: {model}")

        for var in self.aux_names:
            out.append(f" Auxiliary: {var}")
        return str.join("\n", out)

    @property
    def name(self) -> str:
        """Name of comparer (=name of observation)"""
        return str(self.data.attrs["name"])

    @name.setter
    def name(self, name: str) -> None:
        if name in _RESERVED_NAMES:
            raise ValueError(
                f"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!"
            )
        self.data.attrs["name"] = name

    @property
    def gtype(self) -> str:
        """Geometry type"""
        return str(self.data.attrs["gtype"])

    @property
    def quantity(self) -> Quantity:
        """Quantity object"""
        return Quantity(
            name=self.data[self._obs_str].attrs["long_name"],
            unit=self.data[self._obs_str].attrs["units"],
            is_directional=bool(
                self.data[self._obs_str].attrs.get("is_directional", False)
            ),
        )

    @quantity.setter
    def quantity(self, quantity: Quantity) -> None:
        assert isinstance(quantity, Quantity), "value must be a Quantity object"
        self.data[self._obs_str].attrs["long_name"] = quantity.name
        self.data[self._obs_str].attrs["units"] = quantity.unit
        self.data[self._obs_str].attrs["is_directional"] = int(quantity.is_directional)

    @property
    def n_points(self) -> int:
        """number of compared points"""
        return len(self.data[self._obs_str]) if self.data else 0

    @property
    def time(self) -> pd.DatetimeIndex:
        """time of compared data as pandas DatetimeIndex"""
        return self.data.time.to_index()

    # TODO: Should we keep these? (renamed to start_time and end_time)
    # @property
    # def start(self) -> pd.Timestamp:
    #     """start pd.Timestamp of compared data"""
    #     return self.time[0]

    # @property
    # def end(self) -> pd.Timestamp:
    #     """end pd.Timestamp of compared data"""
    #     return self.time[-1]

    @property
    def x(self) -> Any:
        """x-coordinate"""
        return self._coordinate_values("x")

    @property
    def y(self) -> Any:
        """y-coordinate"""
        return self._coordinate_values("y")

    @property
    def z(self) -> Any:
        """z-coordinate"""
        return self._coordinate_values("z")

    def _coordinate_values(self, coord: str) -> Any:
        vals = self.data[coord].values
        return np.atleast_1d(vals)[0] if vals.ndim == 0 else vals

    @property
    def n_models(self) -> int:
        """Number of model results"""
        return len(self.mod_names)

    @property
    def mod_names(self) -> List[str]:
        """List of model result names"""
        return list(self.raw_mod_data.keys())

    def __contains__(self, key: str) -> bool:
        return key in self.data.data_vars

    @property
    def aux_names(self) -> List[str]:
        """List of auxiliary data names"""
        # we don't require the kind attribute to be "auxiliary"
        return list(
            [
                k
                for k, v in self.data.data_vars.items()
                if v.attrs["kind"] not in ["observation", "model"]
            ]
        )

    # TODO: always "Observation", necessary to have this property?
    @property
    def _obs_name(self) -> str:
        return self._obs_str

    @property
    def weight(self) -> float:
        """Weight of observation (used in ComparerCollection score() and mean_skill())"""
        return float(self.data.attrs["weight"])

    @weight.setter
    def weight(self, value: float) -> None:
        self.data.attrs["weight"] = float(value)

    @property
    def _unit_text(self) -> str:
        # Quantity name and unit as text suitable for plot labels
        return f"{self.quantity.name} [{self.quantity.unit}]"

    @property
    def attrs(self) -> dict[str, Any]:
        """Attributes of the observation"""
        return self.data.attrs

    @attrs.setter
    def attrs(self, value: dict[str, Serializable]) -> None:
        self.data.attrs = value

    # TODO: is this the best way to copy (self.data.copy.. )
    def __copy__(self) -> "Comparer":
        return deepcopy(self)

    def copy(self) -> "Comparer":
        return self.__copy__()

    def rename(
        self, mapping: Mapping[str, str], errors: Literal["raise", "ignore"] = "raise"
    ) -> "Comparer":
        """Rename observation, model or auxiliary data variables

        Parameters
        ----------
        mapping : dict
            mapping of old names to new names
        errors : {'raise', 'ignore'}, optional
            If 'raise', raise a KeyError if any of the old names
            do not exist in the data. By default 'raise'.

        Returns
        -------
        Comparer

        Examples
        --------
        >>> cmp = ms.match(observation, modeldata)
        >>> cmp.mod_names
        ['model1']
        >>> cmp2 = cmp.rename({'model1': 'model2'})
        >>> cmp2.mod_names
        ['model2']
        """
        if errors not in ["raise", "ignore"]:
            raise ValueError("errors must be 'raise' or 'ignore'")

        allowed_keys = [self.name] + self.mod_names + self.aux_names
        if errors == "raise":
            for k in mapping.keys():
                if k not in allowed_keys:
                    raise KeyError(f"Unknown key: {k}; must be one of {allowed_keys}")
        else:
            # "ignore": silently remove keys that are not in allowed_keys
            mapping = {k: v for k, v in mapping.items() if k in allowed_keys}

        if any([k in _RESERVED_NAMES for k in mapping.values()]):
            # TODO: also check for duplicates
            raise ValueError(
                f"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!"
            )

        # rename observation
        obs_name = mapping.get(self.name, self.name)
        ma_mapping = {k: v for k, v in mapping.items() if k != self.name}

        data = self.data.rename(ma_mapping)
        data.attrs["name"] = obs_name
        raw_mod_data = dict()
        for k, v in self.raw_mod_data.items():
            if k in ma_mapping:
                # copy is needed here as the same raw data could be
                # used for multiple Comparers!
                v2 = v.copy()
                v2.data = v2.data.rename({k: ma_mapping[k]})
                raw_mod_data[ma_mapping[k]] = v2
            else:
                raw_mod_data[k] = v

        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)

    def _to_observation(self) -> PointObservation | TrackObservation:
        """Convert to Observation"""
        if self.gtype == "point":
            df = self.data.drop_vars(["x", "y", "z"])[self._obs_str].to_dataframe()
            return PointObservation(
                data=df,
                name=self.name,
                x=self.x,
                y=self.y,
                z=self.z,
                quantity=self.quantity,
                # TODO: add attrs
            )
        elif self.gtype == "track":
            df = self.data.drop_vars(["z"])[[self._obs_str]].to_dataframe()
            return TrackObservation(
                data=df,
                item=0,
                x_item=1,
                y_item=2,
                name=self.name,
                quantity=self.quantity,
                # TODO: add attrs
            )
        else:
            raise NotImplementedError(f"Unknown gtype: {self.gtype}")

    def __iadd__(self, other: Comparer):  # type: ignore
        from ..matching import match_space_time

        missing_models = set(self.mod_names) - set(other.mod_names)
        if len(missing_models) == 0:
            # same obs name and same model names
            self.data = xr.concat([self.data, other.data], dim="time").drop_duplicates(
                "time"
            )
        else:
            self.raw_mod_data.update(other.raw_mod_data)
            matched = match_space_time(
                observation=self._to_observation(),
                raw_mod_data=self.raw_mod_data,  # type: ignore
            )
            self.data = matched

        return self

    def __add__(
        self, other: Union["Comparer", "ComparerCollection"]
    ) -> "ComparerCollection" | "Comparer":
        from ._collection import ComparerCollection
        from ..matching import match_space_time

        if not isinstance(other, (Comparer, ComparerCollection)):
            raise TypeError(f"Cannot add {type(other)} to {type(self)}")

        if isinstance(other, Comparer) and (self.name == other.name):
            missing_models = set(self.mod_names) - set(other.mod_names)
            if len(missing_models) == 0:
                # same obs name and same model names
                cmp = self.copy()
                cmp.data = xr.concat(
                    [cmp.data, other.data], dim="time"
                ).drop_duplicates("time")

            else:
                raw_mod_data = self.raw_mod_data.copy()
                raw_mod_data.update(other.raw_mod_data)  # TODO!
                matched = match_space_time(
                    observation=self._to_observation(),
                    raw_mod_data=raw_mod_data,  # type: ignore
                )
                cmp = Comparer(matched_data=matched, raw_mod_data=raw_mod_data)

            return cmp
        else:
            if isinstance(other, Comparer):
                return ComparerCollection([self, other])
            elif isinstance(other, ComparerCollection):
                return ComparerCollection([self, *other])

    def sel(
        self,
        model: Optional[IdxOrNameTypes] = None,
        start: Optional[TimeTypes] = None,
        end: Optional[TimeTypes] = None,
        time: Optional[TimeTypes] = None,
        area: Optional[List[float]] = None,
    ) -> "Comparer":
        """Select data based on model, time and/or area.

        Parameters
        ----------
        model : str or int or list of str or list of int, optional
            Model name or index. If None, all models are selected.
        start : str or datetime, optional
            Start time. If None, all times are selected.
        end : str or datetime, optional
            End time. If None, all times are selected.
        time : str or datetime, optional
            Time. If None, all times are selected.
        area : list of float, optional
            bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.

        Returns
        -------
        Comparer
            New Comparer with selected data.
        """
        if (time is not None) and ((start is not None) or (end is not None)):
            raise ValueError("Cannot use both time and start/end")

        d = self.data
        raw_mod_data = self.raw_mod_data
        if model is not None:
            if isinstance(model, (str, int)):
                models = [model]
            else:
                models = list(model)
            mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]
            dropped_models = [m for m in self.mod_names if m not in mod_names]
            d = d.drop_vars(dropped_models)
            raw_mod_data = {m: raw_mod_data[m] for m in mod_names}
        if (start is not None) or (end is not None):
            # TODO: can this be done without to_index? (simplify)
            d = d.sel(time=d.time.to_index().to_frame().loc[start:end].index)  # type: ignore

            # Note: if user asks for a specific time, we also filter raw
            raw_mod_data = {
                k: v.sel(time=slice(start, end)) for k, v in raw_mod_data.items()
            }  # type: ignore
        if time is not None:
            d = d.sel(time=time)

            # Note: if user asks for a specific time, we also filter raw
            raw_mod_data = {k: v.sel(time=time) for k, v in raw_mod_data.items()}
        if area is not None:
            if _area_is_bbox(area):
                x0, y0, x1, y1 = area
                mask = (d.x > x0) & (d.x < x1) & (d.y > y0) & (d.y < y1)
            elif _area_is_polygon(area):
                polygon = np.array(area)
                xy = np.column_stack((d.x, d.y))
                mask = _inside_polygon(polygon, xy)
            else:
                raise ValueError("area supports bbox [x0,y0,x1,y1] and closed polygon")
            if self.gtype == "point":
                # if False, return empty data
                d = d if mask else d.isel(time=slice(None, 0))
            else:
                d = d.isel(time=mask)
        return Comparer.from_matched_data(data=d, raw_mod_data=raw_mod_data)

    def where(
        self,
        cond: Union[bool, np.ndarray, xr.DataArray],
    ) -> "Comparer":
        """Return a new Comparer with values where cond is True

        Parameters
        ----------
        cond : bool, np.ndarray, xr.DataArray
            This selects the values to return.

        Returns
        -------
        Comparer
            New Comparer with values where cond is True and other otherwise.

        Examples
        --------
        >>> c2 = c.where(c.data.Observation > 0)
        """
        d = self.data.where(cond, other=np.nan)
        d = d.dropna(dim="time", how="all")
        return Comparer.from_matched_data(d, self.raw_mod_data)

    def query(self, query: str) -> "Comparer":
        """Return a new Comparer with values where query cond is True

        Parameters
        ----------
        query : str
            Query string, see pandas.DataFrame.query

        Returns
        -------
        Comparer
            New Comparer with values where cond is True and other otherwise.

        Examples
        --------
        >>> c2 = c.query("Observation > 0")
        """
        d = self.data.query({"time": query})
        d = d.dropna(dim="time", how="all")
        return Comparer.from_matched_data(d, self.raw_mod_data)

    def _to_long_dataframe(
        self, attrs_keys: Iterable[str] | None = None
    ) -> pd.DataFrame:
        """Return a copy of the data as a long-format pandas DataFrame (for groupby operations)"""

        data = self.data.drop_vars("z", errors="ignore")

        # this step is necessary since we keep arbitrary derived data in the dataset, but not z
        # i.e. using a hardcoded whitelist of variables to keep is less flexible
        id_vars = [v for v in data.variables if v not in self.mod_names]

        attrs = (
            {key: data.attrs.get(key, False) for key in attrs_keys}
            if attrs_keys
            else {}
        )

        df = (
            data.to_dataframe()
            .reset_index()
            .melt(
                value_vars=self.mod_names,
                var_name="model",
                value_name="mod_val",
                id_vars=id_vars,
            )
            .rename(columns={self._obs_str: "obs_val"})
            .assign(observation=self.name)
            .assign(**attrs)
            .astype({"model": "category", "observation": "category"})
        )

        return df

    def skill(
        self,
        by: str | Iterable[str] | None = None,
        metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,
        **kwargs: Any,
    ) -> SkillTable:
        """Skill assessment of model(s)

        Parameters
        ----------
        by : str or List[str], optional
            group by, by default ["model"]

            - by column name
            - by temporal bin of the DateTimeIndex via the freq-argument
            (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily
            - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the
            syntax 'dt:month'. The dt-argument is different from the freq-argument
            in that it gives month-of-year rather than month-of-data.
        metrics : list, optional
            list of modelskill.metrics, by default modelskill.options.metrics.list

        Returns
        -------
        SkillTable
            skill assessment object

        See also
        --------
        sel
            a method for filtering/selecting data

        Examples
        --------
        >>> import modelskill as ms
        >>> cc = ms.match(c2, mod)
        >>> cc['c2'].skill().round(2)
                       n  bias  rmse  urmse   mae    cc    si    r2
        observation
        c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99

        >>> cc['c2'].skill(by='freq:D').round(2)
                     n  bias  rmse  urmse   mae    cc    si    r2
        2017-10-27  72 -0.19  0.31   0.25  0.26  0.48  0.12  0.98
        2017-10-28   0   NaN   NaN    NaN   NaN   NaN   NaN   NaN
        2017-10-29  41  0.33  0.41   0.25  0.36  0.96  0.06  0.99
        """
        metrics = _parse_metric(metrics, directional=self.quantity.is_directional)

        # TODO remove in v1.1
        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore
        if kwargs != {}:
            raise AttributeError(f"Unknown keyword arguments: {kwargs}")

        cmp = self.sel(
            model=model,
            start=start,
            end=end,
            area=area,
        )
        if cmp.n_points == 0:
            raise ValueError("No data selected for skill assessment")

        by = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=1)

        df = cmp._to_long_dataframe()
        res = _groupby_df(df, by=by, metrics=metrics)
        res["x"] = np.nan if self.gtype == "track" else cmp.x
        res["y"] = np.nan if self.gtype == "track" else cmp.y
        res = self._add_as_col_if_not_in_index(df, skilldf=res)
        return SkillTable(res)

    def _add_as_col_if_not_in_index(
        self, df: pd.DataFrame, skilldf: pd.DataFrame
    ) -> pd.DataFrame:
        """Add a field to skilldf if unique in df"""
        FIELDS = ("observation", "model")

        for field in FIELDS:
            if (field == "model") and (self.n_models <= 1):
                continue
            if field not in skilldf.index.names:
                unames = df[field].unique()
                if len(unames) == 1:
                    skilldf.insert(loc=0, column=field, value=unames[0])
        return skilldf

    def score(
        self,
        metric: str | Callable = mtr.rmse,
        **kwargs: Any,
    ) -> Dict[str, float]:
        """Model skill score

        Parameters
        ----------
        metric : list, optional
            a single metric from modelskill.metrics, by default rmse

        Returns
        -------
        dict[str, float]
            skill score as a single number (for each model)

        See also
        --------
        skill
            a method for skill assessment returning a pd.DataFrame

        Examples
        --------
        >>> import modelskill as ms
        >>> cmp = ms.match(c2, mod)
        >>> cmp.score()
        {'mod': 0.3517964910888918}

        >>> cmp.score(metric="mape")
        {'mod': 11.567399646108198}
        """
        metric = _parse_metric(metric)[0]
        if not (callable(metric) or isinstance(metric, str)):
            raise ValueError("metric must be a string or a function")

        # TODO remove in v1.1
        model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore
        assert kwargs == {}, f"Unknown keyword arguments: {kwargs}"

        sk = self.skill(
            by=["model", "observation"],
            metrics=[metric],
            model=model,  # deprecated
            start=start,  # deprecated
            end=end,  # deprecated
            area=area,  # deprecated
        )
        df = sk.to_dataframe()

        metric_name = metric if isinstance(metric, str) else metric.__name__
        ser = df.reset_index().groupby("model", observed=True)[metric_name].mean()
        score = {str(k): float(v) for k, v in ser.items()}
        return score

    def gridded_skill(
        self,
        bins: int = 5,
        binsize: float | None = None,
        by: str | Iterable[str] | None = None,
        metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,
        n_min: int | None = None,
        **kwargs: Any,
    ):
        """Aggregated spatial skill assessment of model(s) on a regular spatial grid.

        Parameters
        ----------
        bins: int, list of scalars, or IntervalIndex, or tuple of, optional
            criteria to bin x and y by, argument bins to pd.cut(), default 5
            define different bins for x and y a tuple
            e.g.: bins = 5, bins = (5,[2,3,5])
        binsize : float, optional
            bin size for x and y dimension, overwrites bins
            creates bins with reference to round(mean(x)), round(mean(y))
        by : (str, List[str]), optional
            group by column name or by temporal bin via the freq-argument
            (using pandas pd.Grouper(freq)),
            e.g.: 'freq:M' = monthly; 'freq:D' daily
            by default ["model","observation"]
        metrics : list, optional
            list of modelskill.metrics, by default modelskill.options.metrics.list
        n_min : int, optional
            minimum number of observations in a grid cell;
            cells with fewer observations get a score of `np.nan`

        Returns
        -------
        SkillGrid
            skill assessment as a SkillGrid object

        See also
        --------
        skill
            a method for aggregated skill assessment

        Examples
        --------
        >>> import modelskill as ms
        >>> cmp = ms.match(c2, mod)   # satellite altimeter vs. model
        >>> cmp.gridded_skill(metrics='bias')
        <xarray.Dataset>
        Dimensions:      (x: 5, y: 5)
        Coordinates:
            observation   'alti'
        * x            (x) float64 -0.436 1.543 3.517 5.492 7.466
        * y            (y) float64 50.6 51.66 52.7 53.75 54.8
        Data variables:
            n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76
            bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143

        >>> gs = cc.gridded_skill(binsize=0.5)
        >>> gs.data.coords
        Coordinates:
            observation   'alti'
        * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5
        * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5
        """

        # TODO remove in v1.1
        model, start, end, area = _get_deprecated_args(kwargs)
        assert kwargs == {}, f"Unknown keyword arguments: {kwargs}"

        cmp = self.sel(
            model=model,
            start=start,
            end=end,
            area=area,
        )

        metrics = _parse_metric(metrics)
        if cmp.n_points == 0:
            raise ValueError("No data to compare")

        df = cmp._to_long_dataframe()
        df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)

        agg_cols = _parse_groupby(by=by, n_mod=cmp.n_models, n_qnt=1)
        if "x" not in agg_cols:
            agg_cols.insert(0, "x")
        if "y" not in agg_cols:
            agg_cols.insert(0, "y")

        df = df.drop(columns=["x", "y"]).rename(columns=dict(xBin="x", yBin="y"))
        res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)
        ds = res.to_xarray().squeeze()

        # change categorial index to coordinates
        for dim in ("x", "y"):
            ds[dim] = ds[dim].astype(float)

        return SkillGrid(ds)

    @property
    def _residual(self) -> np.ndarray:
        df = self.data.drop_vars(["x", "y", "z"]).to_dataframe()
        obs = df[self._obs_str].values
        mod = df[self.mod_names].values
        return mod - np.vstack(obs)

    def remove_bias(
        self, correct: Literal["Model", "Observation"] = "Model"
    ) -> Comparer:
        cmp = self.copy()

        bias = cmp._residual.mean(axis=0)
        if correct == "Model":
            for j in range(cmp.n_models):
                mod_name = cmp.mod_names[j]
                mod_ts = cmp.raw_mod_data[mod_name]
                with xr.set_options(keep_attrs=True):  # type: ignore
                    mod_ts.data[mod_name].values = mod_ts.values - bias[j]
                    cmp.data[mod_name].values = cmp.data[mod_name].values - bias[j]
        elif correct == "Observation":
            # what if multiple models?
            with xr.set_options(keep_attrs=True):  # type: ignore
                cmp.data[cmp._obs_str].values = cmp.data[cmp._obs_str].values + bias
        else:
            raise ValueError(
                f"Unknown correct={correct}. Only know 'Model' and 'Observation'"
            )
        return cmp

    def to_dataframe(self) -> pd.DataFrame:
        """Convert matched data to pandas DataFrame

        Include x, y coordinates only if gtype=track

        Returns
        -------
        pd.DataFrame
            data as a pandas DataFrame
        """
        if self.gtype == str(GeometryType.POINT):
            # we remove the scalar coordinate variables as they
            # will otherwise be columns in the dataframe
            return self.data.drop_vars(["x", "y", "z"]).to_dataframe()
        elif self.gtype == str(GeometryType.TRACK):
            df = self.data.drop_vars(["z"]).to_dataframe()
            # make sure that x, y cols are first
            cols = ["x", "y"] + [c for c in df.columns if c not in ["x", "y"]]
            return df[cols]
        else:
            raise NotImplementedError(f"Unknown gtype: {self.gtype}")

    def save(self, filename: Union[str, Path]) -> None:
        """Save to netcdf file

        Parameters
        ----------
        filename : str or Path
            filename
        """
        ds = self.data

        # add self.raw_mod_data to ds with prefix 'raw_' to avoid name conflicts
        # an alternative strategy would be to use NetCDF groups
        # https://docs.xarray.dev/en/stable/user-guide/io.html#groups

        # There is no need to save raw data for track data, since it is identical to the matched data
        if self.gtype == "point":
            ds = self.data.copy()  # copy needed to avoid modifying self.data

            for key, ts_mod in self.raw_mod_data.items():
                ts_mod = ts_mod.copy()
                #  rename time to unique name
                ts_mod.data = ts_mod.data.rename({"time": "_time_raw_" + key})
                # da = ds_mod.to_xarray()[key]
                ds["_raw_" + key] = ts_mod.data[key]

        ds.to_netcdf(filename)

    @staticmethod
    def load(filename: Union[str, Path]) -> "Comparer":
        """Load from netcdf file

        Parameters
        ----------
        filename : str or Path
            filename

        Returns
        -------
        Comparer
        """
        with xr.open_dataset(filename) as ds:
            data = ds.load()

        if data.gtype == "track":
            return Comparer(matched_data=data)

        if data.gtype == "point":
            raw_mod_data: Dict[str, TimeSeries] = {}

            for var in data.data_vars:
                var_name = str(var)
                if var_name[:5] == "_raw_":
                    new_key = var_name[5:]  # remove prefix '_raw_'
                    ds = data[[var_name]].rename(
                        {"_time_raw_" + new_key: "time", var_name: new_key}
                    )
                    ts = PointObservation(data=ds, name=new_key)
                    # TODO: name of time?
                    # ts.name = new_key
                    # df = (
                    #     data[var_name]
                    #     .to_dataframe()
                    #     .rename(
                    #         columns={"_time_raw_" + new_key: "time", var_name: new_key}
                    #     )
                    # )
                    raw_mod_data[new_key] = ts

                    # data = data.drop(var_name).drop("_time_raw_" + new_key)

            # filter variables, only keep the ones with a 'time' dimension
            data = data[[v for v in data.data_vars if "time" in data[v].dims]]

            return Comparer(matched_data=data, raw_mod_data=raw_mod_data)

        else:
            raise NotImplementedError(f"Unknown gtype: {data.gtype}")

    # =============== Deprecated methods ===============

    def spatial_skill(
        self,
        bins=5,
        binsize=None,
        by=None,
        metrics=None,
        n_min=None,
        **kwargs,
    ):
        # deprecated
        warnings.warn(
            "spatial_skill is deprecated, use gridded_skill instead", FutureWarning
        )
        return self.gridded_skill(
            bins=bins,
            binsize=binsize,
            by=by,
            metrics=metrics,
            n_min=n_min,
            **kwargs,
        )

    # TODO remove plotting methods in v1.1
    def scatter(
        self,
        *,
        bins=120,
        quantiles=None,
        fit_to_quantiles=False,
        show_points=None,
        show_hist=None,
        show_density=None,
        norm=None,
        backend="matplotlib",
        figsize=(8, 8),
        xlim=None,
        ylim=None,
        reg_method="ols",
        title=None,
        xlabel=None,
        ylabel=None,
        skill_table=None,
        **kwargs,
    ):
        warnings.warn(
            "This method is deprecated, use plot.scatter instead", FutureWarning
        )

        # TODO remove in v1.1
        model, start, end, area = _get_deprecated_args(kwargs)

        # self.plot.scatter(
        self.sel(
            model=model,
            start=start,
            end=end,
            area=area,
        ).plot.scatter(
            bins=bins,
            quantiles=quantiles,
            fit_to_quantiles=fit_to_quantiles,
            show_points=show_points,
            show_hist=show_hist,
            show_density=show_density,
            norm=norm,
            backend=backend,
            figsize=figsize,
            xlim=xlim,
            ylim=ylim,
            reg_method=reg_method,
            title=title,
            xlabel=xlabel,
            ylabel=ylabel,
            **kwargs,
        )

    def taylor(
        self,
        normalize_std=False,
        figsize=(7, 7),
        marker="o",
        marker_size=6.0,
        title="Taylor diagram",
        **kwargs,
    ):
        warnings.warn("taylor is deprecated, use plot.taylor instead", FutureWarning)

        self.plot.taylor(
            normalize_std=normalize_std,
            figsize=figsize,
            marker=marker,
            marker_size=marker_size,
            title=title,
            **kwargs,
        )

    def hist(
        self, *, model=None, bins=100, title=None, density=True, alpha=0.5, **kwargs
    ):
        warnings.warn("hist is deprecated. Use plot.hist instead.", FutureWarning)
        return self.plot.hist(
            model=model, bins=bins, title=title, density=density, alpha=alpha, **kwargs
        )

    def kde(self, ax=None, **kwargs) -> Axes:
        warnings.warn("kde is deprecated. Use plot.kde instead.", FutureWarning)

        return self.plot.kde(ax=ax, **kwargs)

    def plot_timeseries(
        self, title=None, *, ylim=None, figsize=None, backend="matplotlib", **kwargs
    ):
        warnings.warn(
            "plot_timeseries is deprecated. Use plot.timeseries instead.", FutureWarning
        )

        return self.plot.timeseries(
            title=title, ylim=ylim, figsize=figsize, backend=backend, **kwargs
        )

    def residual_hist(self, bins=100, title=None, color=None, **kwargs):
        warnings.warn(
            "residual_hist is deprecated. Use plot.residual_hist instead.",
            FutureWarning,
        )

        return self.plot.residual_hist(bins=bins, title=title, color=color, **kwargs)

attrs `property` `writable`

attrs

Attributes of the observation

aux_names `property`

aux_names

List of auxiliary data names

gtype `property`

gtype

Geometry type

mod_names `property`

mod_names

List of model result names

n_models `property`

n_models

Number of model results

n_points `property`

n_points

number of compared points

name `property` `writable`

name

Name of comparer (=name of observation)

plot `instance-attribute`

plot = plotter(self)

Plot using the ComparerPlotter

Examples:

>>> cmp.plot.timeseries()
>>> cmp.plot.scatter()
>>> cmp.plot.qq()
>>> cmp.plot.hist()
>>> cmp.plot.kde()
>>> cmp.plot.box()
>>> cmp.plot.residual_hist()
>>> cmp.plot.taylor()

quantity `property` `writable`

quantity

Quantity object

time `property`

time

time of compared data as pandas DatetimeIndex

weight `property` `writable`

weight

Weight of observation (used in ComparerCollection score() and mean_skill())

x `property`

x-coordinate

y `property`

y-coordinate

z `property`

z-coordinate

from_matched_data `staticmethod`

from_matched_data(data, raw_mod_data=None, obs_item=None, mod_items=None, aux_items=None, name=None, weight=1.0, x=None, y=None, z=None, x_item=None, y_item=None, quantity=None)

Initialize from compared data

Source code in modelskill/comparison/_comparison.py

@staticmethod
def from_matched_data(
    data: xr.Dataset | pd.DataFrame,
    raw_mod_data: Optional[Dict[str, TimeSeries]] = None,
    obs_item: str | int | None = None,
    mod_items: Optional[Iterable[str | int]] = None,
    aux_items: Optional[Iterable[str | int]] = None,
    name: Optional[str] = None,
    weight: float = 1.0,
    x: Optional[float] = None,
    y: Optional[float] = None,
    z: Optional[float] = None,
    x_item: str | int | None = None,
    y_item: str | int | None = None,
    quantity: Optional[Quantity] = None,
) -> "Comparer":
    """Initialize from compared data"""
    if not isinstance(data, xr.Dataset):
        # TODO: handle raw_mod_data by accessing data.attrs["kind"] and only remove nan after
        data = _matched_data_to_xarray(
            data,
            obs_item=obs_item,
            mod_items=mod_items,
            aux_items=aux_items,
            name=name,
            x=x,
            y=y,
            z=z,
            x_item=x_item,
            y_item=y_item,
            quantity=quantity,
        )
        data.attrs["weight"] = weight
    return Comparer(matched_data=data, raw_mod_data=raw_mod_data)

gridded_skill

gridded_skill(bins=5, binsize=None, by=None, metrics=None, n_min=None, **kwargs)

Aggregated spatial skill assessment of model(s) on a regular spatial grid.

Parameters:

Name	Type	Description	Default
`bins`	`int`	criteria to bin x and y by, argument bins to pd.cut(), default 5 define different bins for x and y a tuple e.g.: bins = 5, bins = (5,[2,3,5])	`5`
`binsize`	`float`	bin size for x and y dimension, overwrites bins creates bins with reference to round(mean(x)), round(mean(y))	`None`
`by`	`(str, List[str])`	group by column name or by temporal bin via the freq-argument (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily by default ["model","observation"]	`None`
`metrics`	`list`	list of modelskill.metrics, by default modelskill.options.metrics.list	`None`
`n_min`	`int`	minimum number of observations in a grid cell; cells with fewer observations get a score of `np.nan`	`None`

Returns:

Type	Description
`SkillGrid`	skill assessment as a SkillGrid object

See also

skill a method for aggregated skill assessment

Examples:

>>> import modelskill as ms
>>> cmp = ms.match(c2, mod)   # satellite altimeter vs. model
>>> cmp.gridded_skill(metrics='bias')
<xarray.Dataset>
Dimensions:      (x: 5, y: 5)
Coordinates:
    observation   'alti'
* x            (x) float64 -0.436 1.543 3.517 5.492 7.466
* y            (y) float64 50.6 51.66 52.7 53.75 54.8
Data variables:
    n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76
    bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143

>>> gs = cc.gridded_skill(binsize=0.5)
>>> gs.data.coords
Coordinates:
    observation   'alti'
* x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5
* y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5

Source code in modelskill/comparison/_comparison.py

def gridded_skill(
    self,
    bins: int = 5,
    binsize: float | None = None,
    by: str | Iterable[str] | None = None,
    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,
    n_min: int | None = None,
    **kwargs: Any,
):
    """Aggregated spatial skill assessment of model(s) on a regular spatial grid.

    Parameters
    ----------
    bins: int, list of scalars, or IntervalIndex, or tuple of, optional
        criteria to bin x and y by, argument bins to pd.cut(), default 5
        define different bins for x and y a tuple
        e.g.: bins = 5, bins = (5,[2,3,5])
    binsize : float, optional
        bin size for x and y dimension, overwrites bins
        creates bins with reference to round(mean(x)), round(mean(y))
    by : (str, List[str]), optional
        group by column name or by temporal bin via the freq-argument
        (using pandas pd.Grouper(freq)),
        e.g.: 'freq:M' = monthly; 'freq:D' daily
        by default ["model","observation"]
    metrics : list, optional
        list of modelskill.metrics, by default modelskill.options.metrics.list
    n_min : int, optional
        minimum number of observations in a grid cell;
        cells with fewer observations get a score of `np.nan`

    Returns
    -------
    SkillGrid
        skill assessment as a SkillGrid object

    See also
    --------
    skill
        a method for aggregated skill assessment

    Examples
    --------
    >>> import modelskill as ms
    >>> cmp = ms.match(c2, mod)   # satellite altimeter vs. model
    >>> cmp.gridded_skill(metrics='bias')
    <xarray.Dataset>
    Dimensions:      (x: 5, y: 5)
    Coordinates:
        observation   'alti'
    * x            (x) float64 -0.436 1.543 3.517 5.492 7.466
    * y            (y) float64 50.6 51.66 52.7 53.75 54.8
    Data variables:
        n            (x, y) int32 3 0 0 14 37 17 50 36 72 ... 0 0 15 20 0 0 0 28 76
        bias         (x, y) float64 -0.02626 nan nan ... nan 0.06785 -0.1143

    >>> gs = cc.gridded_skill(binsize=0.5)
    >>> gs.data.coords
    Coordinates:
        observation   'alti'
    * x            (x) float64 -1.5 -0.5 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5
    * y            (y) float64 51.5 52.5 53.5 54.5 55.5 56.5
    """

    # TODO remove in v1.1
    model, start, end, area = _get_deprecated_args(kwargs)
    assert kwargs == {}, f"Unknown keyword arguments: {kwargs}"

    cmp = self.sel(
        model=model,
        start=start,
        end=end,
        area=area,
    )

    metrics = _parse_metric(metrics)
    if cmp.n_points == 0:
        raise ValueError("No data to compare")

    df = cmp._to_long_dataframe()
    df = _add_spatial_grid_to_df(df=df, bins=bins, binsize=binsize)

    agg_cols = _parse_groupby(by=by, n_mod=cmp.n_models, n_qnt=1)
    if "x" not in agg_cols:
        agg_cols.insert(0, "x")
    if "y" not in agg_cols:
        agg_cols.insert(0, "y")

    df = df.drop(columns=["x", "y"]).rename(columns=dict(xBin="x", yBin="y"))
    res = _groupby_df(df, by=agg_cols, metrics=metrics, n_min=n_min)
    ds = res.to_xarray().squeeze()

    # change categorial index to coordinates
    for dim in ("x", "y"):
        ds[dim] = ds[dim].astype(float)

    return SkillGrid(ds)

load `staticmethod`

load(filename)

Load from netcdf file

Parameters:

Name	Type	Description	Default
`filename`	`str or Path`	filename	required

Returns:

Type	Description
`Comparer`

Source code in modelskill/comparison/_comparison.py

@staticmethod
def load(filename: Union[str, Path]) -> "Comparer":
    """Load from netcdf file

    Parameters
    ----------
    filename : str or Path
        filename

    Returns
    -------
    Comparer
    """
    with xr.open_dataset(filename) as ds:
        data = ds.load()

    if data.gtype == "track":
        return Comparer(matched_data=data)

    if data.gtype == "point":
        raw_mod_data: Dict[str, TimeSeries] = {}

        for var in data.data_vars:
            var_name = str(var)
            if var_name[:5] == "_raw_":
                new_key = var_name[5:]  # remove prefix '_raw_'
                ds = data[[var_name]].rename(
                    {"_time_raw_" + new_key: "time", var_name: new_key}
                )
                ts = PointObservation(data=ds, name=new_key)
                # TODO: name of time?
                # ts.name = new_key
                # df = (
                #     data[var_name]
                #     .to_dataframe()
                #     .rename(
                #         columns={"_time_raw_" + new_key: "time", var_name: new_key}
                #     )
                # )
                raw_mod_data[new_key] = ts

                # data = data.drop(var_name).drop("_time_raw_" + new_key)

        # filter variables, only keep the ones with a 'time' dimension
        data = data[[v for v in data.data_vars if "time" in data[v].dims]]

        return Comparer(matched_data=data, raw_mod_data=raw_mod_data)

    else:
        raise NotImplementedError(f"Unknown gtype: {data.gtype}")

query

query(query)

Return a new Comparer with values where query cond is True

Parameters:

Name	Type	Description	Default
`query`	`str`	Query string, see pandas.DataFrame.query	required

Returns:

Type	Description
`Comparer`	New Comparer with values where cond is True and other otherwise.

Examples:

>>> c2 = c.query("Observation > 0")

Source code in modelskill/comparison/_comparison.py

def query(self, query: str) -> "Comparer":
    """Return a new Comparer with values where query cond is True

    Parameters
    ----------
    query : str
        Query string, see pandas.DataFrame.query

    Returns
    -------
    Comparer
        New Comparer with values where cond is True and other otherwise.

    Examples
    --------
    >>> c2 = c.query("Observation > 0")
    """
    d = self.data.query({"time": query})
    d = d.dropna(dim="time", how="all")
    return Comparer.from_matched_data(d, self.raw_mod_data)

rename

rename(mapping, errors='raise')

Rename observation, model or auxiliary data variables

Parameters:

Name	Type	Description	Default
`mapping`	`dict`	mapping of old names to new names	required
`errors`	`('raise', 'ignore')`	If 'raise', raise a KeyError if any of the old names do not exist in the data. By default 'raise'.	`'raise'`

Returns:

Type	Description
`Comparer`

Examples:

>>> cmp = ms.match(observation, modeldata)
>>> cmp.mod_names
['model1']
>>> cmp2 = cmp.rename({'model1': 'model2'})
>>> cmp2.mod_names
['model2']

Source code in modelskill/comparison/_comparison.py

def rename(
    self, mapping: Mapping[str, str], errors: Literal["raise", "ignore"] = "raise"
) -> "Comparer":
    """Rename observation, model or auxiliary data variables

    Parameters
    ----------
    mapping : dict
        mapping of old names to new names
    errors : {'raise', 'ignore'}, optional
        If 'raise', raise a KeyError if any of the old names
        do not exist in the data. By default 'raise'.

    Returns
    -------
    Comparer

    Examples
    --------
    >>> cmp = ms.match(observation, modeldata)
    >>> cmp.mod_names
    ['model1']
    >>> cmp2 = cmp.rename({'model1': 'model2'})
    >>> cmp2.mod_names
    ['model2']
    """
    if errors not in ["raise", "ignore"]:
        raise ValueError("errors must be 'raise' or 'ignore'")

    allowed_keys = [self.name] + self.mod_names + self.aux_names
    if errors == "raise":
        for k in mapping.keys():
            if k not in allowed_keys:
                raise KeyError(f"Unknown key: {k}; must be one of {allowed_keys}")
    else:
        # "ignore": silently remove keys that are not in allowed_keys
        mapping = {k: v for k, v in mapping.items() if k in allowed_keys}

    if any([k in _RESERVED_NAMES for k in mapping.values()]):
        # TODO: also check for duplicates
        raise ValueError(
            f"Cannot rename to any of {_RESERVED_NAMES}, these are reserved names!"
        )

    # rename observation
    obs_name = mapping.get(self.name, self.name)
    ma_mapping = {k: v for k, v in mapping.items() if k != self.name}

    data = self.data.rename(ma_mapping)
    data.attrs["name"] = obs_name
    raw_mod_data = dict()
    for k, v in self.raw_mod_data.items():
        if k in ma_mapping:
            # copy is needed here as the same raw data could be
            # used for multiple Comparers!
            v2 = v.copy()
            v2.data = v2.data.rename({k: ma_mapping[k]})
            raw_mod_data[ma_mapping[k]] = v2
        else:
            raw_mod_data[k] = v

    return Comparer(matched_data=data, raw_mod_data=raw_mod_data)

save

save(filename)

Save to netcdf file

Parameters:

Name	Type	Description	Default
`filename`	`str or Path`	filename	required

Source code in modelskill/comparison/_comparison.py

def save(self, filename: Union[str, Path]) -> None:
    """Save to netcdf file

    Parameters
    ----------
    filename : str or Path
        filename
    """
    ds = self.data

    # add self.raw_mod_data to ds with prefix 'raw_' to avoid name conflicts
    # an alternative strategy would be to use NetCDF groups
    # https://docs.xarray.dev/en/stable/user-guide/io.html#groups

    # There is no need to save raw data for track data, since it is identical to the matched data
    if self.gtype == "point":
        ds = self.data.copy()  # copy needed to avoid modifying self.data

        for key, ts_mod in self.raw_mod_data.items():
            ts_mod = ts_mod.copy()
            #  rename time to unique name
            ts_mod.data = ts_mod.data.rename({"time": "_time_raw_" + key})
            # da = ds_mod.to_xarray()[key]
            ds["_raw_" + key] = ts_mod.data[key]

    ds.to_netcdf(filename)

score

score(metric=mtr.rmse, **kwargs)

Model skill score

Parameters:

Name	Type	Description	Default
`metric`	`list`	a single metric from modelskill.metrics, by default rmse	`rmse`

Returns:

Type	Description
`dict[str, float]`	skill score as a single number (for each model)

See also

skill a method for skill assessment returning a pd.DataFrame

Examples:

>>> import modelskill as ms
>>> cmp = ms.match(c2, mod)
>>> cmp.score()
{'mod': 0.3517964910888918}

>>> cmp.score(metric="mape")
{'mod': 11.567399646108198}

Source code in modelskill/comparison/_comparison.py

def score(
    self,
    metric: str | Callable = mtr.rmse,
    **kwargs: Any,
) -> Dict[str, float]:
    """Model skill score

    Parameters
    ----------
    metric : list, optional
        a single metric from modelskill.metrics, by default rmse

    Returns
    -------
    dict[str, float]
        skill score as a single number (for each model)

    See also
    --------
    skill
        a method for skill assessment returning a pd.DataFrame

    Examples
    --------
    >>> import modelskill as ms
    >>> cmp = ms.match(c2, mod)
    >>> cmp.score()
    {'mod': 0.3517964910888918}

    >>> cmp.score(metric="mape")
    {'mod': 11.567399646108198}
    """
    metric = _parse_metric(metric)[0]
    if not (callable(metric) or isinstance(metric, str)):
        raise ValueError("metric must be a string or a function")

    # TODO remove in v1.1
    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore
    assert kwargs == {}, f"Unknown keyword arguments: {kwargs}"

    sk = self.skill(
        by=["model", "observation"],
        metrics=[metric],
        model=model,  # deprecated
        start=start,  # deprecated
        end=end,  # deprecated
        area=area,  # deprecated
    )
    df = sk.to_dataframe()

    metric_name = metric if isinstance(metric, str) else metric.__name__
    ser = df.reset_index().groupby("model", observed=True)[metric_name].mean()
    score = {str(k): float(v) for k, v in ser.items()}
    return score

sel

sel(model=None, start=None, end=None, time=None, area=None)

Select data based on model, time and/or area.

Parameters:

Name	Type	Description	Default
`model`	`str or int or list of str or list of int`	Model name or index. If None, all models are selected.	`None`
`start`	`str or datetime`	Start time. If None, all times are selected.	`None`
`end`	`str or datetime`	End time. If None, all times are selected.	`None`
`time`	`str or datetime`	Time. If None, all times are selected.	`None`
`area`	`list of float`	bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.	`None`

Returns:

Type	Description
`Comparer`	New Comparer with selected data.

Source code in modelskill/comparison/_comparison.py

def sel(
    self,
    model: Optional[IdxOrNameTypes] = None,
    start: Optional[TimeTypes] = None,
    end: Optional[TimeTypes] = None,
    time: Optional[TimeTypes] = None,
    area: Optional[List[float]] = None,
) -> "Comparer":
    """Select data based on model, time and/or area.

    Parameters
    ----------
    model : str or int or list of str or list of int, optional
        Model name or index. If None, all models are selected.
    start : str or datetime, optional
        Start time. If None, all times are selected.
    end : str or datetime, optional
        End time. If None, all times are selected.
    time : str or datetime, optional
        Time. If None, all times are selected.
    area : list of float, optional
        bbox: [x0, y0, x1, y1] or Polygon. If None, all areas are selected.

    Returns
    -------
    Comparer
        New Comparer with selected data.
    """
    if (time is not None) and ((start is not None) or (end is not None)):
        raise ValueError("Cannot use both time and start/end")

    d = self.data
    raw_mod_data = self.raw_mod_data
    if model is not None:
        if isinstance(model, (str, int)):
            models = [model]
        else:
            models = list(model)
        mod_names: List[str] = [_get_name(m, self.mod_names) for m in models]
        dropped_models = [m for m in self.mod_names if m not in mod_names]
        d = d.drop_vars(dropped_models)
        raw_mod_data = {m: raw_mod_data[m] for m in mod_names}
    if (start is not None) or (end is not None):
        # TODO: can this be done without to_index? (simplify)
        d = d.sel(time=d.time.to_index().to_frame().loc[start:end].index)  # type: ignore

        # Note: if user asks for a specific time, we also filter raw
        raw_mod_data = {
            k: v.sel(time=slice(start, end)) for k, v in raw_mod_data.items()
        }  # type: ignore
    if time is not None:
        d = d.sel(time=time)

        # Note: if user asks for a specific time, we also filter raw
        raw_mod_data = {k: v.sel(time=time) for k, v in raw_mod_data.items()}
    if area is not None:
        if _area_is_bbox(area):
            x0, y0, x1, y1 = area
            mask = (d.x > x0) & (d.x < x1) & (d.y > y0) & (d.y < y1)
        elif _area_is_polygon(area):
            polygon = np.array(area)
            xy = np.column_stack((d.x, d.y))
            mask = _inside_polygon(polygon, xy)
        else:
            raise ValueError("area supports bbox [x0,y0,x1,y1] and closed polygon")
        if self.gtype == "point":
            # if False, return empty data
            d = d if mask else d.isel(time=slice(None, 0))
        else:
            d = d.isel(time=mask)
    return Comparer.from_matched_data(data=d, raw_mod_data=raw_mod_data)

skill

skill(by=None, metrics=None, **kwargs)

Skill assessment of model(s)

Parameters:

Name	Type	Description	Default
`by`	`str or List[str]`	group by, by default ["model"] by column name by temporal bin of the DateTimeIndex via the freq-argument (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the syntax 'dt:month'. The dt-argument is different from the freq-argument in that it gives month-of-year rather than month-of-data.	`None`
`metrics`	`list`	list of modelskill.metrics, by default modelskill.options.metrics.list	`None`

Returns:

Type	Description
`SkillTable`	skill assessment object

See also

sel a method for filtering/selecting data

Examples:

>>> import modelskill as ms
>>> cc = ms.match(c2, mod)
>>> cc['c2'].skill().round(2)
               n  bias  rmse  urmse   mae    cc    si    r2
observation
c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99

>>> cc['c2'].skill(by='freq:D').round(2)
             n  bias  rmse  urmse   mae    cc    si    r2
2017-10-27  72 -0.19  0.31   0.25  0.26  0.48  0.12  0.98
2017-10-28   0   NaN   NaN    NaN   NaN   NaN   NaN   NaN
2017-10-29  41  0.33  0.41   0.25  0.36  0.96  0.06  0.99

Source code in modelskill/comparison/_comparison.py

def skill(
    self,
    by: str | Iterable[str] | None = None,
    metrics: Iterable[str] | Iterable[Callable] | str | Callable | None = None,
    **kwargs: Any,
) -> SkillTable:
    """Skill assessment of model(s)

    Parameters
    ----------
    by : str or List[str], optional
        group by, by default ["model"]

        - by column name
        - by temporal bin of the DateTimeIndex via the freq-argument
        (using pandas pd.Grouper(freq)), e.g.: 'freq:M' = monthly; 'freq:D' daily
        - by the dt accessor of the DateTimeIndex (e.g. 'dt.month') using the
        syntax 'dt:month'. The dt-argument is different from the freq-argument
        in that it gives month-of-year rather than month-of-data.
    metrics : list, optional
        list of modelskill.metrics, by default modelskill.options.metrics.list

    Returns
    -------
    SkillTable
        skill assessment object

    See also
    --------
    sel
        a method for filtering/selecting data

    Examples
    --------
    >>> import modelskill as ms
    >>> cc = ms.match(c2, mod)
    >>> cc['c2'].skill().round(2)
                   n  bias  rmse  urmse   mae    cc    si    r2
    observation
    c2           113 -0.00  0.35   0.35  0.29  0.97  0.12  0.99

    >>> cc['c2'].skill(by='freq:D').round(2)
                 n  bias  rmse  urmse   mae    cc    si    r2
    2017-10-27  72 -0.19  0.31   0.25  0.26  0.48  0.12  0.98
    2017-10-28   0   NaN   NaN    NaN   NaN   NaN   NaN   NaN
    2017-10-29  41  0.33  0.41   0.25  0.36  0.96  0.06  0.99
    """
    metrics = _parse_metric(metrics, directional=self.quantity.is_directional)

    # TODO remove in v1.1
    model, start, end, area = _get_deprecated_args(kwargs)  # type: ignore
    if kwargs != {}:
        raise AttributeError(f"Unknown keyword arguments: {kwargs}")

    cmp = self.sel(
        model=model,
        start=start,
        end=end,
        area=area,
    )
    if cmp.n_points == 0:
        raise ValueError("No data selected for skill assessment")

    by = _parse_groupby(by, n_mod=cmp.n_models, n_qnt=1)

    df = cmp._to_long_dataframe()
    res = _groupby_df(df, by=by, metrics=metrics)
    res["x"] = np.nan if self.gtype == "track" else cmp.x
    res["y"] = np.nan if self.gtype == "track" else cmp.y
    res = self._add_as_col_if_not_in_index(df, skilldf=res)
    return SkillTable(res)

to_dataframe

to_dataframe()

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type	Description
`DataFrame`	data as a pandas DataFrame

Source code in modelskill/comparison/_comparison.py

def to_dataframe(self) -> pd.DataFrame:
    """Convert matched data to pandas DataFrame

    Include x, y coordinates only if gtype=track

    Returns
    -------
    pd.DataFrame
        data as a pandas DataFrame
    """
    if self.gtype == str(GeometryType.POINT):
        # we remove the scalar coordinate variables as they
        # will otherwise be columns in the dataframe
        return self.data.drop_vars(["x", "y", "z"]).to_dataframe()
    elif self.gtype == str(GeometryType.TRACK):
        df = self.data.drop_vars(["z"]).to_dataframe()
        # make sure that x, y cols are first
        cols = ["x", "y"] + [c for c in df.columns if c not in ["x", "y"]]
        return df[cols]
    else:
        raise NotImplementedError(f"Unknown gtype: {self.gtype}")

where

where(cond)

Return a new Comparer with values where cond is True

Parameters:

Name	Type	Description	Default
`cond`	`(bool, ndarray, DataArray)`	This selects the values to return.	required

Returns:

Type	Description
`Comparer`	New Comparer with values where cond is True and other otherwise.

Examples:

>>> c2 = c.where(c.data.Observation > 0)

Source code in modelskill/comparison/_comparison.py

def where(
    self,
    cond: Union[bool, np.ndarray, xr.DataArray],
) -> "Comparer":
    """Return a new Comparer with values where cond is True

    Parameters
    ----------
    cond : bool, np.ndarray, xr.DataArray
        This selects the values to return.

    Returns
    -------
    Comparer
        New Comparer with values where cond is True and other otherwise.

    Examples
    --------
    >>> c2 = c.where(c.data.Observation > 0)
    """
    d = self.data.where(cond, other=np.nan)
    d = d.dropna(dim="time", how="all")
    return Comparer.from_matched_data(d, self.raw_mod_data)

modelskill.comparison._comparer_plotter.ComparerPlotter

Plotter class for Comparer

Examples:

>>> cmp.plot.scatter()
>>> cmp.plot.timeseries()
>>> cmp.plot.hist()
>>> cmp.plot.kde()
>>> cmp.plot.qq()
>>> cmp.plot.box()

Source code in modelskill/comparison/_comparer_plotter.py

class ComparerPlotter:
    """Plotter class for Comparer

    Examples
    --------
    >>> cmp.plot.scatter()
    >>> cmp.plot.timeseries()
    >>> cmp.plot.hist()
    >>> cmp.plot.kde()
    >>> cmp.plot.qq()
    >>> cmp.plot.box()
    """

    def __init__(self, comparer: Comparer) -> None:
        self.comparer = comparer
        self.is_directional = comparer.quantity.is_directional

    def __call__(
        self, *args, **kwargs
    ) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:
        """Plot scatter plot of modelled vs observed data"""
        return self.scatter(*args, **kwargs)

    def timeseries(
        self,
        *,
        title: str | None = None,
        ylim: Tuple[float, float] | None = None,
        ax=None,
        figsize: Tuple[float, float] | None = None,
        backend: str = "matplotlib",
        **kwargs,
    ):
        """Timeseries plot showing compared data: observation vs modelled

        Parameters
        ----------
        title : str, optional
            plot title, by default None
        ylim : (float, float), optional
            plot range for the model (ymin, ymax), by default None
        ax : matplotlib.axes.Axes, optional
            axes to plot on, by default None
        figsize : (float, float), optional
            figure size, by default None
        backend : str, optional
            use "plotly" (interactive) or "matplotlib" backend,
            by default "matplotlib"
        **kwargs
            other keyword arguments to fig.update_layout (plotly backend)

        Returns
        -------
        matplotlib.axes.Axes or plotly.graph_objects.Figure
        """
        from ._comparison import MOD_COLORS

        cmp = self.comparer

        if title is None:
            title = cmp.name

        if backend == "matplotlib":
            fig, ax = _get_fig_ax(ax, figsize)
            for j in range(cmp.n_models):
                key = cmp.mod_names[j]
                mod = cmp.raw_mod_data[key]._values_as_series
                mod.plot(ax=ax, color=MOD_COLORS[j])

            ax.scatter(
                cmp.time,
                cmp.data[cmp._obs_name].values,
                marker=".",
                color=cmp.data[cmp._obs_name].attrs["color"],
            )
            ax.set_ylabel(cmp._unit_text)
            ax.legend([*cmp.mod_names, cmp._obs_name])
            ax.set_ylim(ylim)
            if self.is_directional:
                _ytick_directional(ax, ylim)
            ax.set_title(title)
            return ax

        elif backend == "plotly":  # pragma: no cover
            import plotly.graph_objects as go  # type: ignore

            mod_scatter_list = []
            for j in range(cmp.n_models):
                key = cmp.mod_names[j]
                mod = cmp.raw_mod_data[key]._values_as_series
                mod_scatter_list.append(
                    go.Scatter(
                        x=mod.index,
                        y=mod.values,
                        name=key,
                        line=dict(color=MOD_COLORS[j]),
                    )
                )

            fig = go.Figure(
                [
                    *mod_scatter_list,
                    go.Scatter(
                        x=cmp.time,
                        y=cmp.data[cmp._obs_name].values,
                        name=cmp._obs_name,
                        mode="markers",
                        marker=dict(color=cmp.data[cmp._obs_name].attrs["color"]),
                    ),
                ]
            )

            fig.update_layout(title=title, yaxis_title=cmp._unit_text, **kwargs)
            fig.update_yaxes(range=ylim)

            return fig
        else:
            raise ValueError(f"Plotting backend: {backend} not supported")

    def hist(
        self,
        bins: int | Sequence = 100,
        *,
        model: str | int | None = None,
        title: str | None = None,
        ax=None,
        figsize: Tuple[float, float] | None = None,
        density: bool = True,
        alpha: float = 0.5,
        **kwargs,
    ):
        """Plot histogram of model data and observations.

        Wraps pandas.DataFrame hist() method.

        Parameters
        ----------
        bins : int, optional
            number of bins, by default 100
        title : str, optional
            plot title, default: [model name] vs [observation name]
        ax : matplotlib.axes.Axes, optional
            axes to plot on, by default None
        figsize : tuple, optional
            figure size, by default None
        density: bool, optional
            If True, draw and return a probability density
        alpha : float, optional
            alpha transparency fraction, by default 0.5
        **kwargs
            other keyword arguments to df.plot.hist()

        Returns
        -------
        matplotlib axes

        See also
        --------
        pandas.Series.plot.hist
        matplotlib.axes.Axes.hist
        """
        cmp = self.comparer

        if model is None:
            mod_names = cmp.mod_names
        else:
            warnings.warn(
                "The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.hist()",
                FutureWarning,
            )
            model_list = [model] if isinstance(model, (str, int)) else model
            mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]

        axes = []
        for mod_name in mod_names:
            ax_mod = self._hist_one_model(
                mod_name=mod_name,
                bins=bins,
                title=title,
                ax=ax,
                figsize=figsize,
                density=density,
                alpha=alpha,
                **kwargs,
            )
            axes.append(ax_mod)

        return axes[0] if len(axes) == 1 else axes

    def _hist_one_model(
        self,
        *,
        mod_name: str,
        bins: int | Sequence | None,
        title: str | None,
        ax,
        figsize: Tuple[float, float] | None,
        density: bool | None,
        alpha: float | None,
        **kwargs,
    ):
        from ._comparison import MOD_COLORS  # TODO move to here

        cmp = self.comparer
        assert mod_name in cmp.mod_names, f"Model {mod_name} not found in comparer"
        mod_idx = _get_idx(mod_name, cmp.mod_names)

        title = f"{mod_name} vs {cmp.name}" if title is None else title

        _, ax = _get_fig_ax(ax, figsize)

        kwargs["alpha"] = alpha
        kwargs["density"] = density
        kwargs["ax"] = ax

        ax = (
            cmp.data[mod_name]
            .to_series()
            .hist(bins=bins, color=MOD_COLORS[mod_idx], **kwargs)
        )

        cmp.data[cmp._obs_name].to_series().hist(
            bins=bins, color=cmp.data[cmp._obs_name].attrs["color"], **kwargs
        )
        ax.legend([mod_name, cmp._obs_name])
        ax.set_title(title)
        ax.set_xlabel(f"{cmp._unit_text}")
        if density:
            ax.set_ylabel("density")
        else:
            ax.set_ylabel("count")

        if self.is_directional:
            _xtick_directional(ax)

        return ax

    def kde(self, ax=None, title=None, figsize=None, **kwargs) -> matplotlib.axes.Axes:
        """Plot kde (kernel density estimates of distributions) of model data and observations.

        Wraps pandas.DataFrame kde() method.

        Parameters
        ----------
        ax : matplotlib.axes.Axes, optional
            axes to plot on, by default None
        title : str, optional
            plot title, default: "KDE plot for [observation name]"
        figsize : tuple, optional
            figure size, by default None
        **kwargs
            other keyword arguments to df.plot.kde()

        Returns
        -------
        matplotlib.axes.Axes

        Examples
        --------
        >>> cmp.plot.kde()
        >>> cmp.plot.kde(bw_method=0.3)
        >>> cmp.plot.kde(ax=ax, bw_method='silverman')
        >>> cmp.plot.kde(xlim=[0,None], title="Density plot");

        See also
        --------
        pandas.Series.plot.kde
        """
        cmp = self.comparer

        _, ax = _get_fig_ax(ax, figsize)

        cmp.data.Observation.to_series().plot.kde(
            ax=ax, linestyle="dashed", label="Observation", **kwargs
        )

        for model in cmp.mod_names:
            cmp.data[model].to_series().plot.kde(ax=ax, label=model, **kwargs)

        ax.set_xlabel(cmp._unit_text)  # TODO

        ax.legend()

        # remove y-axis, ticks and label
        ax.yaxis.set_visible(False)
        ax.tick_params(axis="y", which="both", length=0)
        ax.set_ylabel("")
        title = f"KDE plot for {cmp.name}" if title is None else title
        ax.set_title(title)

        # remove box around plot
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["left"].set_visible(False)

        if self.is_directional:
            _xtick_directional(ax)

        return ax

    def qq(
        self,
        quantiles: int | Sequence[float] | None = None,
        *,
        title=None,
        ax=None,
        figsize=None,
        **kwargs,
    ):
        """Make quantile-quantile (q-q) plot of model data and observations.

        Primarily used to compare multiple models.

        Parameters
        ----------
        quantiles: (int, sequence), optional
            number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000)
            if int, this is the number of points
            if sequence (list of floats), represents the desired quantiles (from 0 to 1)
        title : str, optional
            plot title, default: "Q-Q plot for [observation name]"
        ax : matplotlib.axes.Axes, optional
            axes to plot on, by default None
        figsize : tuple, optional
            figure size, by default None
        **kwargs
            other keyword arguments to plt.plot()

        Returns
        -------
        matplotlib axes

        Examples
        --------
        >>> cmp.plot.qq()

        """
        cmp = self.comparer

        _, ax = _get_fig_ax(ax, figsize)

        x = cmp.data.Observation.values
        xmin, xmax = x.min(), x.max()
        ymin, ymax = np.inf, -np.inf

        for mod_name in cmp.mod_names:
            y = cmp.data[mod_name].values
            ymin = min([y.min(), ymin])
            ymax = max([y.max(), ymax])
            xq, yq = quantiles_xy(x, y, quantiles)
            ax.plot(
                xq,
                yq,
                ".-",
                label=mod_name,
                zorder=4,
                **kwargs,
            )

        xymin = min([xmin, ymin])
        xymax = max([xmax, ymax])

        # 1:1 line
        ax.plot(
            [xymin, xymax],
            [xymin, xymax],
            label=options.plot.scatter.oneone_line.label,
            c=options.plot.scatter.oneone_line.color,
            zorder=3,
        )

        ax.axis("square")
        ax.set_xlim([xymin, xymax])
        ax.set_ylim([xymin, xymax])
        ax.minorticks_on()
        ax.grid(which="both", axis="both", linewidth="0.2", color="k", alpha=0.6)

        ax.legend()
        ax.set_xlabel("Observation, " + cmp._unit_text)
        ax.set_ylabel("Model, " + cmp._unit_text)
        ax.set_title(title or f"Q-Q plot for {cmp.name}")

        if self.is_directional:
            _xtick_directional(ax)
            _ytick_directional(ax)

        return ax

    def box(self, *, ax=None, title=None, figsize=None, **kwargs):
        """Make a box plot of model data and observations.

        Wraps pandas.DataFrame boxplot() method.

        Parameters
        ----------
        ax : matplotlib.axes.Axes, optional
            axes to plot on, by default None
        title : str, optional
            plot title, default: [observation name]
        figsize : tuple, optional
            figure size, by default None
        **kwargs
            other keyword arguments to df.boxplot()

        Returns
        -------
        matplotlib axes

        Examples
        --------
        >>> cmp.plot.box()
        >>> cmp.plot.box(showmeans=True)
        >>> cmp.plot.box(ax=ax, title="Box plot")

        See also
        --------
        pandas.DataFrame.boxplot
        matplotlib.pyplot.boxplot
        """
        cmp = self.comparer

        _, ax = _get_fig_ax(ax, figsize)

        cols = ["Observation"] + cmp.mod_names
        df = cmp.data[cols].to_dataframe()[cols]
        df.boxplot(ax=ax, **kwargs)
        ax.set_ylabel(cmp._unit_text)
        ax.set_title(title or cmp.name)

        if self.is_directional:
            _ytick_directional(ax)

        return ax

    def scatter(
        self,
        *,
        model=None,
        bins: int | float = 120,
        quantiles: int | Sequence[float] | None = None,
        fit_to_quantiles: bool = False,
        show_points: bool | int | float | None = None,
        show_hist: Optional[bool] = None,
        show_density: Optional[bool] = None,
        norm: Optional[colors.Normalize] = None,
        backend: Literal["matplotlib", "plotly"] = "matplotlib",
        figsize: Tuple[float, float] = (8, 8),
        xlim: Optional[Tuple[float, float]] = None,
        ylim: Optional[Tuple[float, float]] = None,
        reg_method: str | bool = "ols",
        title: Optional[str] = None,
        xlabel: Optional[str] = None,
        ylabel: Optional[str] = None,
        skill_table: Optional[Union[str, List[str], bool]] = None,
        ax: Optional[matplotlib.axes.Axes] = None,
        **kwargs,
    ) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:
        """Scatter plot showing compared data: observation vs modelled
        Optionally, with density histogram.

        Parameters
        ----------
        bins: (int, float, sequence), optional
            bins for the 2D histogram on the background. By default 20 bins.
            if int, represents the number of bins of 2D
            if float, represents the bin size
            if sequence (list of int or float), represents the bin edges
        quantiles: (int, sequence), optional
            number of quantiles for QQ-plot, by default None and will depend
            on the scatter data length (10, 100 or 1000); if int, this is
            the number of points; if sequence (list of floats), represents
            the desired quantiles (from 0 to 1)
        fit_to_quantiles: bool, optional
            by default the regression line is fitted to all data, if True,
            it is fitted to the quantiles which can be useful to represent
            the extremes of the distribution, by default False
        show_points : (bool, int, float), optional
            Should the scatter points be displayed? None means: show all
            points if fewer than 1e4, otherwise show 1e4 sample points,
            by default None. float: fraction of points to show on plot
            from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int)
            given, then 'n' points will be displayed, randomly selected
        show_hist : bool, optional
            show the data density as a a 2d histogram, by default None
        show_density: bool, optional
            show the data density as a colormap of the scatter, by default
            None. If both `show_density` and `show_hist` are None, then
            `show_density` is used by default. For binning the data, the
            kword `bins=Float` is used.
        norm : matplotlib.colors norm
            colormap normalization. If None, defaults to
            matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)
        backend : str, optional
            use "plotly" (interactive) or "matplotlib" backend,
            by default "matplotlib"
        figsize : tuple, optional
            width and height of the figure, by default (8, 8)
        xlim : tuple, optional
            plot range for the observation (xmin, xmax), by default None
        ylim : tuple, optional
            plot range for the model (ymin, ymax), by default None
        reg_method : str or bool, optional
            method for determining the regression line
            "ols" : ordinary least squares regression
            "odr" : orthogonal distance regression,
            False : no regression line
            by default "ols"
        title : str, optional
            plot title, by default None
        xlabel : str, optional
            x-label text on plot, by default None
        ylabel : str, optional
            y-label text on plot, by default None
        skill_table : str, List[str], bool, optional
            list of modelskill.metrics or boolean, if True then by default
            modelskill.options.metrics.list. This kword adds a box at the
            right of the scatter plot, by default False
        ax : matplotlib.axes.Axes, optional
            axes to plot on, by default None
        **kwargs
            other keyword arguments to plt.scatter()

        Examples
        ------
        >>> cmp.plot.scatter()
        >>> cmp.plot.scatter(bins=0.2, backend='plotly')
        >>> cmp.plot.scatter(show_points=False, title='no points')
        >>> cmp.plot.scatter(xlabel='all observations', ylabel='my model')
        >>> cmp.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))
        """

        cmp = self.comparer
        if model is None:
            mod_names = cmp.mod_names
        else:
            warnings.warn(
                "The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.scatter()",
                FutureWarning,
            )
            model_list = [model] if isinstance(model, (str, int)) else model
            mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]

        axes = []
        for mod_name in mod_names:
            ax_mod = self._scatter_one_model(
                mod_name=mod_name,
                bins=bins,
                quantiles=quantiles,
                fit_to_quantiles=fit_to_quantiles,
                show_points=show_points,
                show_hist=show_hist,
                show_density=show_density,
                norm=norm,
                backend=backend,
                figsize=figsize,
                xlim=xlim,
                ylim=ylim,
                reg_method=reg_method,
                title=title,
                xlabel=xlabel,
                ylabel=ylabel,
                skill_table=skill_table,
                ax=ax,
                **kwargs,
            )
            axes.append(ax_mod)
        return axes[0] if len(axes) == 1 else axes

    def _scatter_one_model(
        self,
        *,
        mod_name: str,
        bins: int | float,
        quantiles: int | Sequence[float] | None,
        fit_to_quantiles: bool,
        show_points: bool | int | float | None,
        show_hist: Optional[bool],
        show_density: Optional[bool],
        norm: Optional[colors.Normalize],
        backend: Literal["matplotlib", "plotly"],
        figsize: Tuple[float, float],
        xlim: Optional[Tuple[float, float]],
        ylim: Optional[Tuple[float, float]],
        reg_method: str | bool,
        title: Optional[str],
        xlabel: Optional[str],
        ylabel: Optional[str],
        skill_table: Optional[Union[str, List[str], bool]],
        **kwargs,
    ):
        """Scatter plot for one model only"""

        cmp = self.comparer
        cmp_sel_mod = cmp.sel(model=mod_name)
        assert mod_name in cmp.mod_names, f"Model {mod_name} not found in comparer"

        if cmp_sel_mod.n_points == 0:
            raise ValueError("No data found in selection")

        x = cmp_sel_mod.data.Observation.values
        y = cmp_sel_mod.data[mod_name].values

        assert x.ndim == y.ndim == 1, "x and y must be 1D arrays"
        assert x.shape == y.shape, "x and y must have the same shape"

        unit_text = cmp._unit_text
        xlabel = xlabel or f"Observation, {unit_text}"
        ylabel = ylabel or f"Model, {unit_text}"
        title = title or f"{mod_name} vs {cmp.name}"

        skill = None
        skill_score_unit = None

        if skill_table:
            metrics = None if skill_table is True else skill_table
            skill = cmp_sel_mod.skill(metrics=metrics)  # type: ignore
            try:
                skill_score_unit = unit_text.split("[")[1].split("]")[0]
            except IndexError:
                skill_score_unit = ""  # Dimensionless

        if self.is_directional:
            # hide quantiles and regression line
            quantiles = 0
            reg_method = False

        skill_scores = skill.iloc[0].to_dict() if skill is not None else None

        ax = scatter(
            x=x,
            y=y,
            bins=bins,
            quantiles=quantiles,
            fit_to_quantiles=fit_to_quantiles,
            show_points=show_points,
            show_hist=show_hist,
            show_density=show_density,
            norm=norm,
            backend=backend,
            figsize=figsize,
            xlim=xlim,
            ylim=ylim,
            reg_method=reg_method,
            title=title,
            xlabel=xlabel,
            ylabel=ylabel,
            skill_scores=skill_scores,
            skill_score_unit=skill_score_unit,
            **kwargs,
        )

        if backend == "matplotlib" and self.is_directional:
            _xtick_directional(ax, xlim)
            _ytick_directional(ax, ylim)

        return ax

    def taylor(
        self,
        *,
        normalize_std: bool = False,
        figsize: Tuple[float, float] = (7, 7),
        marker: str = "o",
        marker_size: float = 6.0,
        title: str = "Taylor diagram",
    ):
        """Taylor diagram showing model std and correlation to observation
        in a single-quadrant polar plot, with r=std and theta=arccos(cc).

        Parameters
        ----------
        normalize_std : bool, optional
            plot model std normalized with observation std, default False
        figsize : tuple, optional
            width and height of the figure (should be square), by default (7, 7)
        marker : str, optional
            marker type e.g. "x", "*", by default "o"
        marker_size : float, optional
            size of the marker, by default 6
        title : str, optional
            title of the plot, by default "Taylor diagram"

        Returns
        -------
        matplotlib.figure.Figure

        Examples
        ------
        >>> comparer.taylor()
        >>> comparer.taylor(start="2017-10-28", figsize=(5,5))

        References
        ----------
        Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin <yannick.copin@laposte.net>
        """
        cmp = self.comparer

        # TODO consider if this round-trip  via mtr is necessary to get the std:s
        metrics: List[Callable] = [
            mtr._std_obs,
            mtr._std_mod,
            mtr.cc,
        ]

        sk = cmp.skill(metrics=metrics)

        if sk is None:  # TODO
            return
        df = sk.to_dataframe()
        ref_std = 1.0 if normalize_std else df.iloc[0]["_std_obs"]

        df = df[["_std_obs", "_std_mod", "cc"]].copy()
        df.columns = ["obs_std", "std", "cc"]

        pts = [
            TaylorPoint(
                r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size
            )
            for r in df.itertuples()
        ]

        return taylor_diagram(
            obs_std=ref_std,
            points=pts,
            figsize=figsize,
            obs_text=f"Obs: {cmp.name}",
            normalize_std=normalize_std,
            title=title,
        )

    def residual_hist(
        self, bins=100, title=None, color=None, figsize=None, ax=None, **kwargs
    ) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:
        """plot histogram of residual values

        Parameters
        ----------
        bins : int, optional
            specification of bins, by default 100
        title : str, optional
            plot title, default: Residuals, [name]
        color : str, optional
            residual color, by default "#8B8D8E"
        figsize : tuple, optional
            figure size, by default None
        ax : matplotlib.axes.Axes | list[matplotlib.axes.Axes], optional
            axes to plot on, by default None
        **kwargs
            other keyword arguments to plt.hist()

        Returns
        -------
        matplotlib.axes.Axes | list[matplotlib.axes.Axes]
        """
        cmp = self.comparer

        if cmp.n_models == 1:
            return self._residual_hist_one_model(
                bins=bins,
                title=title,
                color=color,
                figsize=figsize,
                ax=ax,
                mod_name=cmp.mod_names[0],
                **kwargs,
            )

        if ax is not None and len(ax) != len(cmp.mod_names):
            raise ValueError("Number of axes must match number of models")

        axs = ax if ax is not None else [None] * len(cmp.mod_names)

        for i, mod_name in enumerate(cmp.mod_names):
            cmp_model = cmp.sel(model=mod_name)
            ax_mod = cmp_model.plot.residual_hist(
                bins=bins,
                title=title,
                color=color,
                figsize=figsize,
                ax=axs[i],
                **kwargs,
            )
            axs[i] = ax_mod

        return axs

    def _residual_hist_one_model(
        self,
        bins=100,
        title=None,
        color=None,
        figsize=None,
        ax=None,
        mod_name=None,
        **kwargs,
    ) -> matplotlib.axes.Axes:
        """Residual histogram for one model only"""
        _, ax = _get_fig_ax(ax, figsize)

        default_color = "#8B8D8E"
        color = default_color if color is None else color
        title = (
            f"Residuals, Observation: {self.comparer.name}, Model: {mod_name}"
            if title is None
            else title
        )
        ax.hist(self.comparer._residual, bins=bins, color=color, **kwargs)
        ax.set_title(title)
        ax.set_xlabel(f"Residuals of {self.comparer._unit_text}")

        if self.is_directional:
            ticks = np.linspace(-180, 180, 9)
            ax.set_xticks(ticks)
            ax.set_xlim(-180, 180)

        return ax

call

__call__(*args, **kwargs)

Plot scatter plot of modelled vs observed data

Source code in modelskill/comparison/_comparer_plotter.py

def __call__(
    self, *args, **kwargs
) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:
    """Plot scatter plot of modelled vs observed data"""
    return self.scatter(*args, **kwargs)

box

box(*, ax=None, title=None, figsize=None, **kwargs)

Make a box plot of model data and observations.

Wraps pandas.DataFrame boxplot() method.

Parameters:

Name	Type	Description	Default
`ax`	`Axes`	axes to plot on, by default None	`None`
`title`	`str`	plot title, default: [observation name]	`None`
`figsize`	`tuple`	figure size, by default None	`None`
`**kwargs`		other keyword arguments to df.boxplot()	`{}`

Returns:

Type	Description
`matplotlib axes`

Examples:

>>> cmp.plot.box()
>>> cmp.plot.box(showmeans=True)
>>> cmp.plot.box(ax=ax, title="Box plot")

See also

pandas.DataFrame.boxplot matplotlib.pyplot.boxplot

Source code in modelskill/comparison/_comparer_plotter.py

def box(self, *, ax=None, title=None, figsize=None, **kwargs):
    """Make a box plot of model data and observations.

    Wraps pandas.DataFrame boxplot() method.

    Parameters
    ----------
    ax : matplotlib.axes.Axes, optional
        axes to plot on, by default None
    title : str, optional
        plot title, default: [observation name]
    figsize : tuple, optional
        figure size, by default None
    **kwargs
        other keyword arguments to df.boxplot()

    Returns
    -------
    matplotlib axes

    Examples
    --------
    >>> cmp.plot.box()
    >>> cmp.plot.box(showmeans=True)
    >>> cmp.plot.box(ax=ax, title="Box plot")

    See also
    --------
    pandas.DataFrame.boxplot
    matplotlib.pyplot.boxplot
    """
    cmp = self.comparer

    _, ax = _get_fig_ax(ax, figsize)

    cols = ["Observation"] + cmp.mod_names
    df = cmp.data[cols].to_dataframe()[cols]
    df.boxplot(ax=ax, **kwargs)
    ax.set_ylabel(cmp._unit_text)
    ax.set_title(title or cmp.name)

    if self.is_directional:
        _ytick_directional(ax)

    return ax

hist

hist(bins=100, *, model=None, title=None, ax=None, figsize=None, density=True, alpha=0.5, **kwargs)

Plot histogram of model data and observations.

Wraps pandas.DataFrame hist() method.

Parameters:

Name	Type	Description	Default
`bins`	`int`	number of bins, by default 100	`100`
`title`	`str`	plot title, default: [model name] vs [observation name]	`None`
`ax`	`Axes`	axes to plot on, by default None	`None`
`figsize`	`tuple`	figure size, by default None	`None`
`density`	`bool`	If True, draw and return a probability density	`True`
`alpha`	`float`	alpha transparency fraction, by default 0.5	`0.5`
`**kwargs`		other keyword arguments to df.plot.hist()	`{}`

Returns:

Type	Description
`matplotlib axes`

See also

pandas.Series.plot.hist matplotlib.axes.Axes.hist

Source code in modelskill/comparison/_comparer_plotter.py

def hist(
    self,
    bins: int | Sequence = 100,
    *,
    model: str | int | None = None,
    title: str | None = None,
    ax=None,
    figsize: Tuple[float, float] | None = None,
    density: bool = True,
    alpha: float = 0.5,
    **kwargs,
):
    """Plot histogram of model data and observations.

    Wraps pandas.DataFrame hist() method.

    Parameters
    ----------
    bins : int, optional
        number of bins, by default 100
    title : str, optional
        plot title, default: [model name] vs [observation name]
    ax : matplotlib.axes.Axes, optional
        axes to plot on, by default None
    figsize : tuple, optional
        figure size, by default None
    density: bool, optional
        If True, draw and return a probability density
    alpha : float, optional
        alpha transparency fraction, by default 0.5
    **kwargs
        other keyword arguments to df.plot.hist()

    Returns
    -------
    matplotlib axes

    See also
    --------
    pandas.Series.plot.hist
    matplotlib.axes.Axes.hist
    """
    cmp = self.comparer

    if model is None:
        mod_names = cmp.mod_names
    else:
        warnings.warn(
            "The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.hist()",
            FutureWarning,
        )
        model_list = [model] if isinstance(model, (str, int)) else model
        mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]

    axes = []
    for mod_name in mod_names:
        ax_mod = self._hist_one_model(
            mod_name=mod_name,
            bins=bins,
            title=title,
            ax=ax,
            figsize=figsize,
            density=density,
            alpha=alpha,
            **kwargs,
        )
        axes.append(ax_mod)

    return axes[0] if len(axes) == 1 else axes

kde

kde(ax=None, title=None, figsize=None, **kwargs)

Plot kde (kernel density estimates of distributions) of model data and observations.

Wraps pandas.DataFrame kde() method.

Parameters:

Name	Type	Description	Default
`ax`	`Axes`	axes to plot on, by default None	`None`
`title`	`str`	plot title, default: "KDE plot for [observation name]"	`None`
`figsize`	`tuple`	figure size, by default None	`None`
`**kwargs`		other keyword arguments to df.plot.kde()	`{}`

Returns:

Type	Description
`Axes`

Examples:

>>> cmp.plot.kde()
>>> cmp.plot.kde(bw_method=0.3)
>>> cmp.plot.kde(ax=ax, bw_method='silverman')
>>> cmp.plot.kde(xlim=[0,None], title="Density plot");

See also

pandas.Series.plot.kde

Source code in modelskill/comparison/_comparer_plotter.py

def kde(self, ax=None, title=None, figsize=None, **kwargs) -> matplotlib.axes.Axes:
    """Plot kde (kernel density estimates of distributions) of model data and observations.

    Wraps pandas.DataFrame kde() method.

    Parameters
    ----------
    ax : matplotlib.axes.Axes, optional
        axes to plot on, by default None
    title : str, optional
        plot title, default: "KDE plot for [observation name]"
    figsize : tuple, optional
        figure size, by default None
    **kwargs
        other keyword arguments to df.plot.kde()

    Returns
    -------
    matplotlib.axes.Axes

    Examples
    --------
    >>> cmp.plot.kde()
    >>> cmp.plot.kde(bw_method=0.3)
    >>> cmp.plot.kde(ax=ax, bw_method='silverman')
    >>> cmp.plot.kde(xlim=[0,None], title="Density plot");

    See also
    --------
    pandas.Series.plot.kde
    """
    cmp = self.comparer

    _, ax = _get_fig_ax(ax, figsize)

    cmp.data.Observation.to_series().plot.kde(
        ax=ax, linestyle="dashed", label="Observation", **kwargs
    )

    for model in cmp.mod_names:
        cmp.data[model].to_series().plot.kde(ax=ax, label=model, **kwargs)

    ax.set_xlabel(cmp._unit_text)  # TODO

    ax.legend()

    # remove y-axis, ticks and label
    ax.yaxis.set_visible(False)
    ax.tick_params(axis="y", which="both", length=0)
    ax.set_ylabel("")
    title = f"KDE plot for {cmp.name}" if title is None else title
    ax.set_title(title)

    # remove box around plot
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)

    if self.is_directional:
        _xtick_directional(ax)

    return ax

qq

qq(quantiles=None, *, title=None, ax=None, figsize=None, **kwargs)

Make quantile-quantile (q-q) plot of model data and observations.

Primarily used to compare multiple models.

Parameters:

Name	Type	Description	Default
`quantiles`	`int \| Sequence[float] \| None`	number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000) if int, this is the number of points if sequence (list of floats), represents the desired quantiles (from 0 to 1)	`None`
`title`	`str`	plot title, default: "Q-Q plot for [observation name]"	`None`
`ax`	`Axes`	axes to plot on, by default None	`None`
`figsize`	`tuple`	figure size, by default None	`None`
`**kwargs`		other keyword arguments to plt.plot()	`{}`

Returns:

Type	Description
`matplotlib axes`

Examples:

>>> cmp.plot.qq()

Source code in modelskill/comparison/_comparer_plotter.py

def qq(
    self,
    quantiles: int | Sequence[float] | None = None,
    *,
    title=None,
    ax=None,
    figsize=None,
    **kwargs,
):
    """Make quantile-quantile (q-q) plot of model data and observations.

    Primarily used to compare multiple models.

    Parameters
    ----------
    quantiles: (int, sequence), optional
        number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000)
        if int, this is the number of points
        if sequence (list of floats), represents the desired quantiles (from 0 to 1)
    title : str, optional
        plot title, default: "Q-Q plot for [observation name]"
    ax : matplotlib.axes.Axes, optional
        axes to plot on, by default None
    figsize : tuple, optional
        figure size, by default None
    **kwargs
        other keyword arguments to plt.plot()

    Returns
    -------
    matplotlib axes

    Examples
    --------
    >>> cmp.plot.qq()

    """
    cmp = self.comparer

    _, ax = _get_fig_ax(ax, figsize)

    x = cmp.data.Observation.values
    xmin, xmax = x.min(), x.max()
    ymin, ymax = np.inf, -np.inf

    for mod_name in cmp.mod_names:
        y = cmp.data[mod_name].values
        ymin = min([y.min(), ymin])
        ymax = max([y.max(), ymax])
        xq, yq = quantiles_xy(x, y, quantiles)
        ax.plot(
            xq,
            yq,
            ".-",
            label=mod_name,
            zorder=4,
            **kwargs,
        )

    xymin = min([xmin, ymin])
    xymax = max([xmax, ymax])

    # 1:1 line
    ax.plot(
        [xymin, xymax],
        [xymin, xymax],
        label=options.plot.scatter.oneone_line.label,
        c=options.plot.scatter.oneone_line.color,
        zorder=3,
    )

    ax.axis("square")
    ax.set_xlim([xymin, xymax])
    ax.set_ylim([xymin, xymax])
    ax.minorticks_on()
    ax.grid(which="both", axis="both", linewidth="0.2", color="k", alpha=0.6)

    ax.legend()
    ax.set_xlabel("Observation, " + cmp._unit_text)
    ax.set_ylabel("Model, " + cmp._unit_text)
    ax.set_title(title or f"Q-Q plot for {cmp.name}")

    if self.is_directional:
        _xtick_directional(ax)
        _ytick_directional(ax)

    return ax

residual_hist

residual_hist(bins=100, title=None, color=None, figsize=None, ax=None, **kwargs)

plot histogram of residual values

Parameters:

Name	Type	Description	Default
`bins`	`int`	specification of bins, by default 100	`100`
`title`	`str`	plot title, default: Residuals, [name]	`None`
`color`	`str`	residual color, by default "#8B8D8E"	`None`
`figsize`	`tuple`	figure size, by default None	`None`
`ax`	`Axes \| list[Axes]`	axes to plot on, by default None	`None`
`**kwargs`		other keyword arguments to plt.hist()	`{}`

Returns:

Type	Description
`Axes \| list[Axes]`

Source code in modelskill/comparison/_comparer_plotter.py

def residual_hist(
    self, bins=100, title=None, color=None, figsize=None, ax=None, **kwargs
) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:
    """plot histogram of residual values

    Parameters
    ----------
    bins : int, optional
        specification of bins, by default 100
    title : str, optional
        plot title, default: Residuals, [name]
    color : str, optional
        residual color, by default "#8B8D8E"
    figsize : tuple, optional
        figure size, by default None
    ax : matplotlib.axes.Axes | list[matplotlib.axes.Axes], optional
        axes to plot on, by default None
    **kwargs
        other keyword arguments to plt.hist()

    Returns
    -------
    matplotlib.axes.Axes | list[matplotlib.axes.Axes]
    """
    cmp = self.comparer

    if cmp.n_models == 1:
        return self._residual_hist_one_model(
            bins=bins,
            title=title,
            color=color,
            figsize=figsize,
            ax=ax,
            mod_name=cmp.mod_names[0],
            **kwargs,
        )

    if ax is not None and len(ax) != len(cmp.mod_names):
        raise ValueError("Number of axes must match number of models")

    axs = ax if ax is not None else [None] * len(cmp.mod_names)

    for i, mod_name in enumerate(cmp.mod_names):
        cmp_model = cmp.sel(model=mod_name)
        ax_mod = cmp_model.plot.residual_hist(
            bins=bins,
            title=title,
            color=color,
            figsize=figsize,
            ax=axs[i],
            **kwargs,
        )
        axs[i] = ax_mod

    return axs

scatter

scatter(*, model=None, bins=120, quantiles=None, fit_to_quantiles=False, show_points=None, show_hist=None, show_density=None, norm=None, backend='matplotlib', figsize=(8, 8), xlim=None, ylim=None, reg_method='ols', title=None, xlabel=None, ylabel=None, skill_table=None, ax=None, **kwargs)

Scatter plot showing compared data: observation vs modelled Optionally, with density histogram.

Parameters:

Name	Type	Description	Default
`bins`	`int \| float`	bins for the 2D histogram on the background. By default 20 bins. if int, represents the number of bins of 2D if float, represents the bin size if sequence (list of int or float), represents the bin edges	`120`
`quantiles`	`int \| Sequence[float] \| None`	number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000); if int, this is the number of points; if sequence (list of floats), represents the desired quantiles (from 0 to 1)	`None`
`fit_to_quantiles`	`bool`	by default the regression line is fitted to all data, if True, it is fitted to the quantiles which can be useful to represent the extremes of the distribution, by default False	`False`
`show_points`	`(bool, int, float)`	Should the scatter points be displayed? None means: show all points if fewer than 1e4, otherwise show 1e4 sample points, by default None. float: fraction of points to show on plot from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int) given, then 'n' points will be displayed, randomly selected	`None`
`show_hist`	`bool`	show the data density as a a 2d histogram, by default None	`None`
`show_density`	`Optional[bool]`	show the data density as a colormap of the scatter, by default None. If both `show_density` and `show_hist` are None, then `show_density` is used by default. For binning the data, the kword `bins=Float` is used.	`None`
`norm`	`matplotlib.colors norm`	colormap normalization. If None, defaults to matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)	`None`
`backend`	`str`	use "plotly" (interactive) or "matplotlib" backend, by default "matplotlib"	`'matplotlib'`
`figsize`	`tuple`	width and height of the figure, by default (8, 8)	`(8, 8)`
`xlim`	`tuple`	plot range for the observation (xmin, xmax), by default None	`None`
`ylim`	`tuple`	plot range for the model (ymin, ymax), by default None	`None`
`reg_method`	`str or bool`	method for determining the regression line "ols" : ordinary least squares regression "odr" : orthogonal distance regression, False : no regression line by default "ols"	`'ols'`
`title`	`str`	plot title, by default None	`None`
`xlabel`	`str`	x-label text on plot, by default None	`None`
`ylabel`	`str`	y-label text on plot, by default None	`None`
`skill_table`	`(str, List[str], bool)`	list of modelskill.metrics or boolean, if True then by default modelskill.options.metrics.list. This kword adds a box at the right of the scatter plot, by default False	`None`
`ax`	`Axes`	axes to plot on, by default None	`None`
`**kwargs`		other keyword arguments to plt.scatter()	`{}`

Examples:

>>> cmp.plot.scatter()
>>> cmp.plot.scatter(bins=0.2, backend='plotly')
>>> cmp.plot.scatter(show_points=False, title='no points')
>>> cmp.plot.scatter(xlabel='all observations', ylabel='my model')
>>> cmp.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))

Source code in modelskill/comparison/_comparer_plotter.py

def scatter(
    self,
    *,
    model=None,
    bins: int | float = 120,
    quantiles: int | Sequence[float] | None = None,
    fit_to_quantiles: bool = False,
    show_points: bool | int | float | None = None,
    show_hist: Optional[bool] = None,
    show_density: Optional[bool] = None,
    norm: Optional[colors.Normalize] = None,
    backend: Literal["matplotlib", "plotly"] = "matplotlib",
    figsize: Tuple[float, float] = (8, 8),
    xlim: Optional[Tuple[float, float]] = None,
    ylim: Optional[Tuple[float, float]] = None,
    reg_method: str | bool = "ols",
    title: Optional[str] = None,
    xlabel: Optional[str] = None,
    ylabel: Optional[str] = None,
    skill_table: Optional[Union[str, List[str], bool]] = None,
    ax: Optional[matplotlib.axes.Axes] = None,
    **kwargs,
) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]:
    """Scatter plot showing compared data: observation vs modelled
    Optionally, with density histogram.

    Parameters
    ----------
    bins: (int, float, sequence), optional
        bins for the 2D histogram on the background. By default 20 bins.
        if int, represents the number of bins of 2D
        if float, represents the bin size
        if sequence (list of int or float), represents the bin edges
    quantiles: (int, sequence), optional
        number of quantiles for QQ-plot, by default None and will depend
        on the scatter data length (10, 100 or 1000); if int, this is
        the number of points; if sequence (list of floats), represents
        the desired quantiles (from 0 to 1)
    fit_to_quantiles: bool, optional
        by default the regression line is fitted to all data, if True,
        it is fitted to the quantiles which can be useful to represent
        the extremes of the distribution, by default False
    show_points : (bool, int, float), optional
        Should the scatter points be displayed? None means: show all
        points if fewer than 1e4, otherwise show 1e4 sample points,
        by default None. float: fraction of points to show on plot
        from 0 to 1. e.g. 0.5 shows 50% of the points. int: if 'n' (int)
        given, then 'n' points will be displayed, randomly selected
    show_hist : bool, optional
        show the data density as a a 2d histogram, by default None
    show_density: bool, optional
        show the data density as a colormap of the scatter, by default
        None. If both `show_density` and `show_hist` are None, then
        `show_density` is used by default. For binning the data, the
        kword `bins=Float` is used.
    norm : matplotlib.colors norm
        colormap normalization. If None, defaults to
        matplotlib.colors.PowerNorm(vmin=1, gamma=0.5)
    backend : str, optional
        use "plotly" (interactive) or "matplotlib" backend,
        by default "matplotlib"
    figsize : tuple, optional
        width and height of the figure, by default (8, 8)
    xlim : tuple, optional
        plot range for the observation (xmin, xmax), by default None
    ylim : tuple, optional
        plot range for the model (ymin, ymax), by default None
    reg_method : str or bool, optional
        method for determining the regression line
        "ols" : ordinary least squares regression
        "odr" : orthogonal distance regression,
        False : no regression line
        by default "ols"
    title : str, optional
        plot title, by default None
    xlabel : str, optional
        x-label text on plot, by default None
    ylabel : str, optional
        y-label text on plot, by default None
    skill_table : str, List[str], bool, optional
        list of modelskill.metrics or boolean, if True then by default
        modelskill.options.metrics.list. This kword adds a box at the
        right of the scatter plot, by default False
    ax : matplotlib.axes.Axes, optional
        axes to plot on, by default None
    **kwargs
        other keyword arguments to plt.scatter()

    Examples
    ------
    >>> cmp.plot.scatter()
    >>> cmp.plot.scatter(bins=0.2, backend='plotly')
    >>> cmp.plot.scatter(show_points=False, title='no points')
    >>> cmp.plot.scatter(xlabel='all observations', ylabel='my model')
    >>> cmp.sel(model='HKZN_v2').plot.scatter(figsize=(10, 10))
    """

    cmp = self.comparer
    if model is None:
        mod_names = cmp.mod_names
    else:
        warnings.warn(
            "The 'model' keyword is deprecated! Instead, filter comparer before plotting cmp.sel(model=...).plot.scatter()",
            FutureWarning,
        )
        model_list = [model] if isinstance(model, (str, int)) else model
        mod_names = [cmp.mod_names[_get_idx(m, cmp.mod_names)] for m in model_list]

    axes = []
    for mod_name in mod_names:
        ax_mod = self._scatter_one_model(
            mod_name=mod_name,
            bins=bins,
            quantiles=quantiles,
            fit_to_quantiles=fit_to_quantiles,
            show_points=show_points,
            show_hist=show_hist,
            show_density=show_density,
            norm=norm,
            backend=backend,
            figsize=figsize,
            xlim=xlim,
            ylim=ylim,
            reg_method=reg_method,
            title=title,
            xlabel=xlabel,
            ylabel=ylabel,
            skill_table=skill_table,
            ax=ax,
            **kwargs,
        )
        axes.append(ax_mod)
    return axes[0] if len(axes) == 1 else axes

taylor

taylor(*, normalize_std=False, figsize=(7, 7), marker='o', marker_size=6.0, title='Taylor diagram')

Taylor diagram showing model std and correlation to observation in a single-quadrant polar plot, with r=std and theta=arccos(cc).

Parameters:

Name	Type	Description	Default
`normalize_std`	`bool`	plot model std normalized with observation std, default False	`False`
`figsize`	`tuple`	width and height of the figure (should be square), by default (7, 7)	`(7, 7)`
`marker`	`str`	marker type e.g. "x", "*", by default "o"	`'o'`
`marker_size`	`float`	size of the marker, by default 6	`6.0`
`title`	`str`	title of the plot, by default "Taylor diagram"	`'Taylor diagram'`

Returns:

Type	Description
`Figure`

Examples:

>>> comparer.taylor()
>>> comparer.taylor(start="2017-10-28", figsize=(5,5))

References

Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin yannick.copin@laposte.net

Source code in modelskill/comparison/_comparer_plotter.py

def taylor(
    self,
    *,
    normalize_std: bool = False,
    figsize: Tuple[float, float] = (7, 7),
    marker: str = "o",
    marker_size: float = 6.0,
    title: str = "Taylor diagram",
):
    """Taylor diagram showing model std and correlation to observation
    in a single-quadrant polar plot, with r=std and theta=arccos(cc).

    Parameters
    ----------
    normalize_std : bool, optional
        plot model std normalized with observation std, default False
    figsize : tuple, optional
        width and height of the figure (should be square), by default (7, 7)
    marker : str, optional
        marker type e.g. "x", "*", by default "o"
    marker_size : float, optional
        size of the marker, by default 6
    title : str, optional
        title of the plot, by default "Taylor diagram"

    Returns
    -------
    matplotlib.figure.Figure

    Examples
    ------
    >>> comparer.taylor()
    >>> comparer.taylor(start="2017-10-28", figsize=(5,5))

    References
    ----------
    Copin, Y. (2018). https://gist.github.com/ycopin/3342888, Yannick Copin <yannick.copin@laposte.net>
    """
    cmp = self.comparer

    # TODO consider if this round-trip  via mtr is necessary to get the std:s
    metrics: List[Callable] = [
        mtr._std_obs,
        mtr._std_mod,
        mtr.cc,
    ]

    sk = cmp.skill(metrics=metrics)

    if sk is None:  # TODO
        return
    df = sk.to_dataframe()
    ref_std = 1.0 if normalize_std else df.iloc[0]["_std_obs"]

    df = df[["_std_obs", "_std_mod", "cc"]].copy()
    df.columns = ["obs_std", "std", "cc"]

    pts = [
        TaylorPoint(
            r.Index, r.obs_std, r.std, r.cc, marker=marker, marker_size=marker_size
        )
        for r in df.itertuples()
    ]

    return taylor_diagram(
        obs_std=ref_std,
        points=pts,
        figsize=figsize,
        obs_text=f"Obs: {cmp.name}",
        normalize_std=normalize_std,
        title=title,
    )

timeseries

timeseries(*, title=None, ylim=None, ax=None, figsize=None, backend='matplotlib', **kwargs)

Timeseries plot showing compared data: observation vs modelled

Parameters:

Name	Type	Description	Default
`title`	`str`	plot title, by default None	`None`
`ylim`	`(float, float)`	plot range for the model (ymin, ymax), by default None	`None`
`ax`	`Axes`	axes to plot on, by default None	`None`
`figsize`	`(float, float)`	figure size, by default None	`None`
`backend`	`str`	use "plotly" (interactive) or "matplotlib" backend, by default "matplotlib"	`'matplotlib'`
`**kwargs`		other keyword arguments to fig.update_layout (plotly backend)	`{}`

Returns:

Type	Description
`Axes or Figure`

Source code in modelskill/comparison/_comparer_plotter.py

def timeseries(
    self,
    *,
    title: str | None = None,
    ylim: Tuple[float, float] | None = None,
    ax=None,
    figsize: Tuple[float, float] | None = None,
    backend: str = "matplotlib",
    **kwargs,
):
    """Timeseries plot showing compared data: observation vs modelled

    Parameters
    ----------
    title : str, optional
        plot title, by default None
    ylim : (float, float), optional
        plot range for the model (ymin, ymax), by default None
    ax : matplotlib.axes.Axes, optional
        axes to plot on, by default None
    figsize : (float, float), optional
        figure size, by default None
    backend : str, optional
        use "plotly" (interactive) or "matplotlib" backend,
        by default "matplotlib"
    **kwargs
        other keyword arguments to fig.update_layout (plotly backend)

    Returns
    -------
    matplotlib.axes.Axes or plotly.graph_objects.Figure
    """
    from ._comparison import MOD_COLORS

    cmp = self.comparer

    if title is None:
        title = cmp.name

    if backend == "matplotlib":
        fig, ax = _get_fig_ax(ax, figsize)
        for j in range(cmp.n_models):
            key = cmp.mod_names[j]
            mod = cmp.raw_mod_data[key]._values_as_series
            mod.plot(ax=ax, color=MOD_COLORS[j])

        ax.scatter(
            cmp.time,
            cmp.data[cmp._obs_name].values,
            marker=".",
            color=cmp.data[cmp._obs_name].attrs["color"],
        )
        ax.set_ylabel(cmp._unit_text)
        ax.legend([*cmp.mod_names, cmp._obs_name])
        ax.set_ylim(ylim)
        if self.is_directional:
            _ytick_directional(ax, ylim)
        ax.set_title(title)
        return ax

    elif backend == "plotly":  # pragma: no cover
        import plotly.graph_objects as go  # type: ignore

        mod_scatter_list = []
        for j in range(cmp.n_models):
            key = cmp.mod_names[j]
            mod = cmp.raw_mod_data[key]._values_as_series
            mod_scatter_list.append(
                go.Scatter(
                    x=mod.index,
                    y=mod.values,
                    name=key,
                    line=dict(color=MOD_COLORS[j]),
                )
            )

        fig = go.Figure(
            [
                *mod_scatter_list,
                go.Scatter(
                    x=cmp.time,
                    y=cmp.data[cmp._obs_name].values,
                    name=cmp._obs_name,
                    mode="markers",
                    marker=dict(color=cmp.data[cmp._obs_name].attrs["color"]),
                ),
            ]
        )

        fig.update_layout(title=title, yaxis_title=cmp._unit_text, **kwargs)
        fig.update_yaxes(range=ylim)

        return fig
    else:
        raise ValueError(f"Plotting backend: {backend} not supported")

Comparer

modelskill.Comparer

attrs property writable

aux_names property

gtype property

mod_names property

n_models property

n_points property

name property writable

plot instance-attribute

quantity property writable

time property

weight property writable

x property

y property

z property

from_matched_data staticmethod

gridded_skill

load staticmethod

query

rename

save

score

sel

skill

to_dataframe

where

modelskill.comparison._comparer_plotter.ComparerPlotter

__call__

box

hist

kde

qq

residual_hist

scatter

taylor

timeseries

attrs `property` `writable`

aux_names `property`

gtype `property`

mod_names `property`

n_models `property`

n_points `property`

name `property` `writable`

plot `instance-attribute`

quantity `property` `writable`

time `property`

weight `property` `writable`

x `property`

y `property`

z `property`

from_matched_data `staticmethod`

load `staticmethod`

call