PointModelResult

modelskill.PointModelResult

Bases: TimeSeries, Alignable

Construct a PointModelResult from a 0d data source: dfs0 file, mikeio.Dataset/DataArray, pandas.DataFrame/Series or xarray.Dataset/DataArray

Parameters:

Name	Type	Description	Default
`data`	`(str, Path, Dataset, DataArray, DataFrame, Series, Dataset or DataArray)`	filename (.dfs0 or .nc) or object with the data	required
`name`	`Optional[str]`	The name of the model result, by default None (will be set to file name or item name)	`None`
`x`	`float`	first coordinate of point position, inferred from data if not given, else None	`None`
`y`	`float`	second coordinate of point position, inferred from data if not given, else None	`None`
`z`	`float`	third coordinate of point position, inferred from data if not given, else None	`None`
`item`	`str \| int \| None`	If multiple items/arrays are present in the input an item must be given (as either an index or a string), by default None	`None`
`quantity`	`Quantity`	Model quantity, for MIKE files this is inferred from the EUM information	`None`
`aux_items`	`Optional[list[int \| str]]`	Auxiliary items, by default None	`None`

Source code in modelskill/model/point.py

class PointModelResult(TimeSeries, Alignable):
    """Construct a PointModelResult from a 0d data source:
    dfs0 file, mikeio.Dataset/DataArray, pandas.DataFrame/Series
    or xarray.Dataset/DataArray

    Parameters
    ----------
    data : str, Path, mikeio.Dataset, mikeio.DataArray, pd.DataFrame, pd.Series, xr.Dataset or xr.DataArray
        filename (.dfs0 or .nc) or object with the data
    name : Optional[str], optional
        The name of the model result,
        by default None (will be set to file name or item name)
    x : float, optional
        first coordinate of point position, inferred from data if not given, else None
    y : float, optional
        second coordinate of point position, inferred from data if not given, else None
    z : float, optional
        third coordinate of point position, inferred from data if not given, else None
    item : str | int | None, optional
        If multiple items/arrays are present in the input an item
        must be given (as either an index or a string), by default None
    quantity : Quantity, optional
        Model quantity, for MIKE files this is inferred from the EUM information
    aux_items : Optional[list[int | str]], optional
        Auxiliary items, by default None
    """

    def __init__(
        self,
        data: PointType,
        *,
        name: Optional[str] = None,
        x: Optional[float] = None,
        y: Optional[float] = None,
        z: Optional[float] = None,
        item: str | int | None = None,
        quantity: Optional[Quantity] = None,
        aux_items: Optional[Sequence[int | str]] = None,
    ) -> None:
        if not self._is_input_validated(data):
            data = _parse_point_input(
                data,
                name=name,
                item=item,
                quantity=quantity,
                aux_items=aux_items,
                x=x,
                y=y,
                z=z,
            )

        assert isinstance(data, xr.Dataset)

        data_var = str(list(data.data_vars)[0])
        data[data_var].attrs["kind"] = "model"
        super().__init__(data=data)

    def extract(
        self, obs: PointObservation, spatial_method: Optional[str] = None
    ) -> PointModelResult:
        if not isinstance(obs, PointObservation):
            raise ValueError(f"obs must be a PointObservation not {type(obs)}")
        if spatial_method is not None:
            raise NotImplementedError(
                "spatial interpolation not possible when matching point model results with point observations"
            )
        return self

    def interp_time(self, observation: Observation, **kwargs: Any) -> PointModelResult:
        """
        Interpolate model result to the time of the observation

        wrapper around xarray.Dataset.interp()

        Parameters
        ----------
        observation : Observation
            The observation to interpolate to
        **kwargs

            Additional keyword arguments passed to xarray.interp

        Returns
        -------
        PointModelResult
            Interpolated model result
        """
        ds = self.align(observation, **kwargs)
        return PointModelResult(ds)

    def align(
        self,
        observation: Observation,
        *,
        max_gap: float | None = None,
        **kwargs: Any,
    ) -> xr.Dataset:
        new_time = observation.time

        dati = self.data.dropna("time").interp(
            time=new_time, assume_sorted=True, **kwargs
        )

        pmr = PointModelResult(dati)
        if max_gap is not None:
            pmr = pmr._remove_model_gaps(mod_index=self.time, max_gap=max_gap)
        return pmr.data

    def _remove_model_gaps(
        self,
        mod_index: pd.DatetimeIndex,
        max_gap: float | None = None,
    ) -> PointModelResult:
        """Remove model gaps longer than max_gap from TimeSeries"""
        max_gap_delta = pd.Timedelta(max_gap, "s")
        valid_times = self._get_valid_times(mod_index, max_gap_delta)
        ds = self.data.sel(time=valid_times)
        return PointModelResult(ds)

    def _get_valid_times(
        self, mod_index: pd.DatetimeIndex, max_gap: pd.Timedelta
    ) -> pd.DatetimeIndex:
        """Used only by _remove_model_gaps"""
        obs_index = self.time
        # init dataframe of available timesteps and their index
        df = pd.DataFrame(index=mod_index)
        df["idx"] = range(len(df))

        # for query times get available left and right index of source times
        df = (
            df.reindex(df.index.union(obs_index))
            .interpolate(method="time", limit_area="inside")
            .reindex(obs_index)
            .dropna()
        )
        df["idxa"] = np.floor(df.idx).astype(int)
        df["idxb"] = np.ceil(df.idx).astype(int)

        # time of left and right source times and time delta
        df["ta"] = mod_index[df.idxa]
        df["tb"] = mod_index[df.idxb]
        df["dt"] = df.tb - df.ta

        # valid query times where time delta is less than max_gap
        valid_idx = df.dt <= max_gap
        return df[valid_idx].index

gtype `property`

gtype

Geometry type

n_points `property`

n_points

Number of data points

name `property` `writable`

name

Name of time series (value item name)

plot `instance-attribute`

plot = plotter(self)

Plot using the ComparerPlotter

Examples:

>>> obj.plot.timeseries()
>>> obj.plot.hist()

quantity `property` `writable`

quantity

Quantity of time series

time `property`

time

Time index

values `property`

values

Values as numpy array

x `property` `writable`

x-coordinate

y `property` `writable`

y-coordinate

equals

equals(other)

Check if two TimeSeries are equal

Source code in modelskill/timeseries/_timeseries.py

def equals(self, other: TimeSeries) -> bool:
    """Check if two TimeSeries are equal"""
    return self.data.equals(other.data)

interp_time

interp_time(observation, **kwargs)

Interpolate model result to the time of the observation

wrapper around xarray.Dataset.interp()

Parameters:

Name	Type	Description	Default
`observation`	`Observation`	The observation to interpolate to	required
`**kwargs`	`Any`	Additional keyword arguments passed to xarray.interp	`{}`

Returns:

Type	Description
`PointModelResult`	Interpolated model result

Source code in modelskill/model/point.py

def interp_time(self, observation: Observation, **kwargs: Any) -> PointModelResult:
    """
    Interpolate model result to the time of the observation

    wrapper around xarray.Dataset.interp()

    Parameters
    ----------
    observation : Observation
        The observation to interpolate to
    **kwargs

        Additional keyword arguments passed to xarray.interp

    Returns
    -------
    PointModelResult
        Interpolated model result
    """
    ds = self.align(observation, **kwargs)
    return PointModelResult(ds)

sel

sel(**kwargs)

Select data by label

Source code in modelskill/timeseries/_timeseries.py

def sel(self: T, **kwargs: Any) -> T:
    """Select data by label"""
    return self.__class__(self.data.sel(**kwargs))

to_dataframe

to_dataframe()

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type	Description
`DataFrame`	data as a pandas DataFrame

Source code in modelskill/timeseries/_timeseries.py

def to_dataframe(self) -> pd.DataFrame:
    """Convert matched data to pandas DataFrame

    Include x, y coordinates only if gtype=track

    Returns
    -------
    pd.DataFrame
        data as a pandas DataFrame
    """
    if self.gtype == str(GeometryType.POINT):
        # we remove the scalar coordinate variables as they
        # will otherwise be columns in the dataframe
        return self.data.drop_vars(["x", "y", "z"]).to_dataframe()
    elif self.gtype == str(GeometryType.TRACK):
        df = self.data.drop_vars(["z"]).to_dataframe()
        # make sure that x, y cols are first
        cols = ["x", "y"] + [c for c in df.columns if c not in ["x", "y"]]
        return df[cols]
    else:
        raise NotImplementedError(f"Unknown gtype: {self.gtype}")

trim

trim(start_time=None, end_time=None, buffer='1s')

Trim observation data to a given time interval

Parameters:

Name	Type	Description	Default
`start_time`	`Timestamp`	start time	`None`
`end_time`	`Timestamp`	end time	`None`
`buffer`	`str`	buffer time around start and end time, by default "1s"	`'1s'`

Source code in modelskill/timeseries/_timeseries.py

def trim(
    self: T,
    start_time: Optional[pd.Timestamp] = None,
    end_time: Optional[pd.Timestamp] = None,
    buffer: str = "1s",
) -> T:
    """Trim observation data to a given time interval

    Parameters
    ----------
    start_time : pd.Timestamp
        start time
    end_time : pd.Timestamp
        end time
    buffer : str, optional
        buffer time around start and end time, by default "1s"
    """
    # Expand time interval with buffer
    start_time = pd.Timestamp(start_time) - pd.Timedelta(buffer)
    end_time = pd.Timestamp(end_time) + pd.Timedelta(buffer)

    data = self.data.sel(time=slice(start_time, end_time))
    if len(data.time) == 0:
        raise ValueError(
            f"No data left after trimming to {start_time} - {end_time}"
        )
    return self.__class__(data)

modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter

Bases: TimeSeriesPlotter

Source code in modelskill/timeseries/_plotter.py

class MatplotlibTimeSeriesPlotter(TimeSeriesPlotter):
    def __init__(self, ts) -> None:
        self._ts = ts

    def __call__(self, **kwargs):
        # default to timeseries plot
        self.timeseries(**kwargs)

    def timeseries(
        self, title=None, color=None, marker=".", linestyle="None", **kwargs
    ):
        """Plot timeseries

        Wraps pandas.DataFrame plot() method.

        Parameters
        ----------
        title : str, optional
            plot title, default: [name]
        color : str, optional
            plot color, by default '#d62728'
        marker : str, optional
            plot marker, by default '.'
        linestyle : str, optional
            line style, by default None
        **kwargs
            other keyword arguments to df.plot()
        """
        kwargs["color"] = self._ts._color if color is None else color
        ax = self._ts._values_as_series.plot(
            marker=marker, linestyle=linestyle, **kwargs
        )

        title = self._ts.name if title is None else title
        ax.set_title(title)

        ax.set_ylabel(str(self._ts.quantity))
        return ax

    def hist(self, bins=100, title=None, color=None, **kwargs):
        """Plot histogram of timeseries values

        Wraps pandas.DataFrame hist() method.

        Parameters
        ----------
        bins : int, optional
            specification of bins, by default 100
        title : str, optional
            plot title, default: observation name
        color : str, optional
            plot color, by default "#d62728"
        **kwargs
            other keyword arguments to df.hist()

        Returns
        -------
        matplotlib axes
        """
        title = self._ts.name if title is None else title

        kwargs["color"] = self._ts._color if color is None else color

        ax = self._ts._values_as_series.hist(bins=bins, **kwargs)
        ax.set_title(title)
        ax.set_xlabel(str(self._ts.quantity))
        return ax

hist

hist(bins=100, title=None, color=None, **kwargs)

Plot histogram of timeseries values

Wraps pandas.DataFrame hist() method.

Parameters:

Name	Type	Description	Default
`bins`	`int`	specification of bins, by default 100	`100`
`title`	`str`	plot title, default: observation name	`None`
`color`	`str`	plot color, by default "#d62728"	`None`
`**kwargs`		other keyword arguments to df.hist()	`{}`

Returns:

Type	Description
`matplotlib axes`

Source code in modelskill/timeseries/_plotter.py

def hist(self, bins=100, title=None, color=None, **kwargs):
    """Plot histogram of timeseries values

    Wraps pandas.DataFrame hist() method.

    Parameters
    ----------
    bins : int, optional
        specification of bins, by default 100
    title : str, optional
        plot title, default: observation name
    color : str, optional
        plot color, by default "#d62728"
    **kwargs
        other keyword arguments to df.hist()

    Returns
    -------
    matplotlib axes
    """
    title = self._ts.name if title is None else title

    kwargs["color"] = self._ts._color if color is None else color

    ax = self._ts._values_as_series.hist(bins=bins, **kwargs)
    ax.set_title(title)
    ax.set_xlabel(str(self._ts.quantity))
    return ax

timeseries

timeseries(title=None, color=None, marker='.', linestyle='None', **kwargs)

Plot timeseries

Wraps pandas.DataFrame plot() method.

Parameters:

Name	Type	Description	Default
`title`	`str`	plot title, default: [name]	`None`
`color`	`str`	plot color, by default '#d62728'	`None`
`marker`	`str`	plot marker, by default '.'	`'.'`
`linestyle`	`str`	line style, by default None	`'None'`
`**kwargs`		other keyword arguments to df.plot()	`{}`

Source code in modelskill/timeseries/_plotter.py

def timeseries(
    self, title=None, color=None, marker=".", linestyle="None", **kwargs
):
    """Plot timeseries

    Wraps pandas.DataFrame plot() method.

    Parameters
    ----------
    title : str, optional
        plot title, default: [name]
    color : str, optional
        plot color, by default '#d62728'
    marker : str, optional
        plot marker, by default '.'
    linestyle : str, optional
        line style, by default None
    **kwargs
        other keyword arguments to df.plot()
    """
    kwargs["color"] = self._ts._color if color is None else color
    ax = self._ts._values_as_series.plot(
        marker=marker, linestyle=linestyle, **kwargs
    )

    title = self._ts.name if title is None else title
    ax.set_title(title)

    ax.set_ylabel(str(self._ts.quantity))
    return ax

PointModelResult

modelskill.PointModelResult

gtype property

n_points property

name property writable

plot instance-attribute

quantity property writable

time property

values property

x property writable

y property writable

equals

interp_time

sel

to_dataframe

trim

modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter

hist

timeseries

gtype `property`

n_points `property`

name `property` `writable`

plot `instance-attribute`

quantity `property` `writable`

time `property`

values `property`

x `property` `writable`

y `property` `writable`