TrackObservation

modelskill.TrackObservation

Bases: Observation

Class for observation with locations moving in space, e.g. satellite altimetry

The data needs in addition to the datetime of each single observation point also, x and y coordinates.

Create TrackObservation from dfs0 or DataFrame

Parameters:

Name	Type	Description	Default
`data`	`(str, Path, Dataset, DataFrame, Dataset)`	path to dfs0 file or object with track data	required
`item`	`(str, int)`	item name or index of values, by default None if data contains more than one item, item must be given	`None`
`name`	`str`	user-defined name for easy identification in plots etc, by default file basename	`None`
`x_item`	`(str, int)`	item name or index of x-coordinate, by default 0	`0`
`y_item`	`(str, int)`	item name or index of y-coordinate, by default 1	`1`
`keep_duplicates`	`(str, bool)`	strategy for handling duplicate timestamps (xarray.Dataset.drop_duplicates): "first" to keep first occurrence, "last" to keep last occurrence, False to drop all duplicates, "offset" to add milliseconds to consecutive duplicates, by default "first"	`'first'`
`quantity`	`Quantity`	The quantity of the observation, for validation with model results For MIKE dfs files this is inferred from the EUM information	`None`
`aux_items`	`list`	list of names or indices of auxiliary items, by default None	`None`
`attrs`	`dict`	additional attributes to be added to the data, by default None	`None`
`weight`	`float`	weighting factor for skill scores, by default 1.0	`1.0`

Examples:

>>> import modelskill as ms
>>> o1 = ms.TrackObservation("track.dfs0", item=2, name="c2")

>>> o1 = ms.TrackObservation("track.dfs0", item="wind_speed", name="c2")

>>> o1 = ms.TrackObservation("lon_after_lat.dfs0", item="wl", x_item=1, y_item=0)

>>> o1 = ms.TrackObservation("track_wl.dfs0", item="wl", x_item="lon", y_item="lat")

>>> df = pd.DataFrame(
...         {
...             "t": pd.date_range("2010-01-01", freq="10s", periods=n),
...             "x": np.linspace(0, 10, n),
...             "y": np.linspace(45000, 45100, n),
...             "swh": [0.1, 0.3, 0.4, 0.5, 0.3],
...         }
... )
>>> df = df.set_index("t")
>>> df
                    x        y  swh
t
2010-01-01 00:00:00   0.0  45000.0  0.1
2010-01-01 00:00:10   2.5  45025.0  0.3
2010-01-01 00:00:20   5.0  45050.0  0.4
2010-01-01 00:00:30   7.5  45075.0  0.5
2010-01-01 00:00:40  10.0  45100.0  0.3
>>> t1 = TrackObservation(df, name="fake")
>>> t1.n_points
5
>>> t1.values
array([0.1, 0.3, 0.4, 0.5, 0.3])
>>> t1.time
DatetimeIndex(['2010-01-01 00:00:00', '2010-01-01 00:00:10',
           '2010-01-01 00:00:20', '2010-01-01 00:00:30',
           '2010-01-01 00:00:40'],
          dtype='datetime64[ns]', name='t', freq=None)
>>> t1.x
array([ 0. ,  2.5,  5. ,  7.5, 10. ])
>>> t1.y
array([45000., 45025., 45050., 45075., 45100.])

Source code in modelskill/obs.py

class TrackObservation(Observation):
    """Class for observation with locations moving in space, e.g. satellite altimetry

    The data needs in addition to the datetime of each single observation point also, x and y coordinates.

    Create TrackObservation from dfs0 or DataFrame

    Parameters
    ----------
    data : (str, Path, mikeio.Dataset, pd.DataFrame, xr.Dataset)
        path to dfs0 file or object with track data
    item : (str, int), optional
        item name or index of values, by default None
        if data contains more than one item, item must be given
    name : str, optional
        user-defined name for easy identification in plots etc, by default file basename
    x_item : (str, int), optional
        item name or index of x-coordinate, by default 0
    y_item : (str, int), optional
        item name or index of y-coordinate, by default 1
    keep_duplicates : (str, bool), optional
        strategy for handling duplicate timestamps (xarray.Dataset.drop_duplicates):
        "first" to keep first occurrence, "last" to keep last occurrence,
        False to drop all duplicates, "offset" to add milliseconds to
        consecutive duplicates, by default "first"
    quantity : Quantity, optional
        The quantity of the observation, for validation with model results
        For MIKE dfs files this is inferred from the EUM information
    aux_items : list, optional
        list of names or indices of auxiliary items, by default None
    attrs : dict, optional
        additional attributes to be added to the data, by default None
    weight : float, optional
        weighting factor for skill scores, by default 1.0

    Examples
    --------
    >>> import modelskill as ms
    >>> o1 = ms.TrackObservation("track.dfs0", item=2, name="c2")

    >>> o1 = ms.TrackObservation("track.dfs0", item="wind_speed", name="c2")

    >>> o1 = ms.TrackObservation("lon_after_lat.dfs0", item="wl", x_item=1, y_item=0)

    >>> o1 = ms.TrackObservation("track_wl.dfs0", item="wl", x_item="lon", y_item="lat")

    >>> df = pd.DataFrame(
    ...         {
    ...             "t": pd.date_range("2010-01-01", freq="10s", periods=n),
    ...             "x": np.linspace(0, 10, n),
    ...             "y": np.linspace(45000, 45100, n),
    ...             "swh": [0.1, 0.3, 0.4, 0.5, 0.3],
    ...         }
    ... )
    >>> df = df.set_index("t")
    >>> df
                        x        y  swh
    t
    2010-01-01 00:00:00   0.0  45000.0  0.1
    2010-01-01 00:00:10   2.5  45025.0  0.3
    2010-01-01 00:00:20   5.0  45050.0  0.4
    2010-01-01 00:00:30   7.5  45075.0  0.5
    2010-01-01 00:00:40  10.0  45100.0  0.3
    >>> t1 = TrackObservation(df, name="fake")
    >>> t1.n_points
    5
    >>> t1.values
    array([0.1, 0.3, 0.4, 0.5, 0.3])
    >>> t1.time
    DatetimeIndex(['2010-01-01 00:00:00', '2010-01-01 00:00:10',
               '2010-01-01 00:00:20', '2010-01-01 00:00:30',
               '2010-01-01 00:00:40'],
              dtype='datetime64[ns]', name='t', freq=None)
    >>> t1.x
    array([ 0. ,  2.5,  5. ,  7.5, 10. ])
    >>> t1.y
    array([45000., 45025., 45050., 45075., 45100.])

    """

    def __init__(
        self,
        data: TrackType,
        *,
        item: Optional[int | str] = None,
        name: Optional[str] = None,
        weight: float = 1.0,
        x_item: Optional[int | str] = 0,
        y_item: Optional[int | str] = 1,
        keep_duplicates: bool | str = "first",
        offset_duplicates: float = 0.001,
        quantity: Optional[Quantity] = None,
        aux_items: Optional[list[int | str]] = None,
        attrs: Optional[dict] = None,
    ) -> None:
        if not self._is_input_validated(data):
            if offset_duplicates != 0.001:
                warnings.warn(
                    "The 'offset_duplicates' argument is deprecated, use 'keep_duplicates' argument.",
                    FutureWarning,
                )
            data = _parse_track_input(
                data=data,
                name=name,
                item=item,
                quantity=quantity,
                x_item=x_item,
                y_item=y_item,
                keep_duplicates=keep_duplicates,
                offset_duplicates=offset_duplicates,
                aux_items=aux_items,
            )
        assert isinstance(data, xr.Dataset)
        super().__init__(data=data, weight=weight, attrs=attrs)

attrs `property` `writable`

attrs

Attributes of the observation

gtype `property`

gtype

Geometry type

n_points `property`

n_points

Number of data points

name `property` `writable`

name

Name of time series (value item name)

plot `instance-attribute`

plot = plotter(self)

Plot using the ComparerPlotter

Examples:

>>> obj.plot.timeseries()
>>> obj.plot.hist()

quantity `property` `writable`

quantity

Quantity of time series

time `property`

time

Time index

values `property`

values

Values as numpy array

weight `property` `writable`

weight

Weighting factor for skill scores

x `property` `writable`

x-coordinate

y `property` `writable`

y-coordinate

equals

equals(other)

Check if two TimeSeries are equal

Source code in modelskill/timeseries/_timeseries.py

def equals(self, other: TimeSeries) -> bool:
    """Check if two TimeSeries are equal"""
    return self.data.equals(other.data)

sel

sel(**kwargs)

Select data by label

Source code in modelskill/timeseries/_timeseries.py

def sel(self: T, **kwargs: Any) -> T:
    """Select data by label"""
    return self.__class__(self.data.sel(**kwargs))

to_dataframe

to_dataframe()

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type	Description
`DataFrame`	data as a pandas DataFrame

Source code in modelskill/timeseries/_timeseries.py

def to_dataframe(self) -> pd.DataFrame:
    """Convert matched data to pandas DataFrame

    Include x, y coordinates only if gtype=track

    Returns
    -------
    pd.DataFrame
        data as a pandas DataFrame
    """
    if self.gtype == str(GeometryType.POINT):
        # we remove the scalar coordinate variables as they
        # will otherwise be columns in the dataframe
        return self.data.drop_vars(["x", "y", "z"]).to_dataframe()
    elif self.gtype == str(GeometryType.TRACK):
        df = self.data.drop_vars(["z"]).to_dataframe()
        # make sure that x, y cols are first
        cols = ["x", "y"] + [c for c in df.columns if c not in ["x", "y"]]
        return df[cols]
    else:
        raise NotImplementedError(f"Unknown gtype: {self.gtype}")

trim

trim(start_time=None, end_time=None, buffer='1s')

Trim observation data to a given time interval

Parameters:

Name	Type	Description	Default
`start_time`	`Timestamp`	start time	`None`
`end_time`	`Timestamp`	end time	`None`
`buffer`	`str`	buffer time around start and end time, by default "1s"	`'1s'`

Source code in modelskill/timeseries/_timeseries.py

def trim(
    self: T,
    start_time: Optional[pd.Timestamp] = None,
    end_time: Optional[pd.Timestamp] = None,
    buffer: str = "1s",
) -> T:
    """Trim observation data to a given time interval

    Parameters
    ----------
    start_time : pd.Timestamp
        start time
    end_time : pd.Timestamp
        end time
    buffer : str, optional
        buffer time around start and end time, by default "1s"
    """
    # Expand time interval with buffer
    start_time = pd.Timestamp(start_time) - pd.Timedelta(buffer)
    end_time = pd.Timestamp(end_time) + pd.Timedelta(buffer)

    data = self.data.sel(time=slice(start_time, end_time))
    if len(data.time) == 0:
        raise ValueError(
            f"No data left after trimming to {start_time} - {end_time}"
        )
    return self.__class__(data)

modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter

Bases: TimeSeriesPlotter

Source code in modelskill/timeseries/_plotter.py

class MatplotlibTimeSeriesPlotter(TimeSeriesPlotter):
    def __init__(self, ts) -> None:
        self._ts = ts

    def __call__(self, **kwargs):
        # default to timeseries plot
        self.timeseries(**kwargs)

    def timeseries(
        self, title=None, color=None, marker=".", linestyle="None", **kwargs
    ):
        """Plot timeseries

        Wraps pandas.DataFrame plot() method.

        Parameters
        ----------
        title : str, optional
            plot title, default: [name]
        color : str, optional
            plot color, by default '#d62728'
        marker : str, optional
            plot marker, by default '.'
        linestyle : str, optional
            line style, by default None
        **kwargs
            other keyword arguments to df.plot()
        """
        kwargs["color"] = self._ts._color if color is None else color
        ax = self._ts._values_as_series.plot(
            marker=marker, linestyle=linestyle, **kwargs
        )

        title = self._ts.name if title is None else title
        ax.set_title(title)

        ax.set_ylabel(str(self._ts.quantity))
        return ax

    def hist(self, bins=100, title=None, color=None, **kwargs):
        """Plot histogram of timeseries values

        Wraps pandas.DataFrame hist() method.

        Parameters
        ----------
        bins : int, optional
            specification of bins, by default 100
        title : str, optional
            plot title, default: observation name
        color : str, optional
            plot color, by default "#d62728"
        **kwargs
            other keyword arguments to df.hist()

        Returns
        -------
        matplotlib axes
        """
        title = self._ts.name if title is None else title

        kwargs["color"] = self._ts._color if color is None else color

        ax = self._ts._values_as_series.hist(bins=bins, **kwargs)
        ax.set_title(title)
        ax.set_xlabel(str(self._ts.quantity))
        return ax

hist

hist(bins=100, title=None, color=None, **kwargs)

Plot histogram of timeseries values

Wraps pandas.DataFrame hist() method.

Parameters:

Name	Type	Description	Default
`bins`	`int`	specification of bins, by default 100	`100`
`title`	`str`	plot title, default: observation name	`None`
`color`	`str`	plot color, by default "#d62728"	`None`
`**kwargs`		other keyword arguments to df.hist()	`{}`

Returns:

Type	Description
`matplotlib axes`

Source code in modelskill/timeseries/_plotter.py

def hist(self, bins=100, title=None, color=None, **kwargs):
    """Plot histogram of timeseries values

    Wraps pandas.DataFrame hist() method.

    Parameters
    ----------
    bins : int, optional
        specification of bins, by default 100
    title : str, optional
        plot title, default: observation name
    color : str, optional
        plot color, by default "#d62728"
    **kwargs
        other keyword arguments to df.hist()

    Returns
    -------
    matplotlib axes
    """
    title = self._ts.name if title is None else title

    kwargs["color"] = self._ts._color if color is None else color

    ax = self._ts._values_as_series.hist(bins=bins, **kwargs)
    ax.set_title(title)
    ax.set_xlabel(str(self._ts.quantity))
    return ax

timeseries

timeseries(title=None, color=None, marker='.', linestyle='None', **kwargs)

Plot timeseries

Wraps pandas.DataFrame plot() method.

Parameters:

Name	Type	Description	Default
`title`	`str`	plot title, default: [name]	`None`
`color`	`str`	plot color, by default '#d62728'	`None`
`marker`	`str`	plot marker, by default '.'	`'.'`
`linestyle`	`str`	line style, by default None	`'None'`
`**kwargs`		other keyword arguments to df.plot()	`{}`

Source code in modelskill/timeseries/_plotter.py

def timeseries(
    self, title=None, color=None, marker=".", linestyle="None", **kwargs
):
    """Plot timeseries

    Wraps pandas.DataFrame plot() method.

    Parameters
    ----------
    title : str, optional
        plot title, default: [name]
    color : str, optional
        plot color, by default '#d62728'
    marker : str, optional
        plot marker, by default '.'
    linestyle : str, optional
        line style, by default None
    **kwargs
        other keyword arguments to df.plot()
    """
    kwargs["color"] = self._ts._color if color is None else color
    ax = self._ts._values_as_series.plot(
        marker=marker, linestyle=linestyle, **kwargs
    )

    title = self._ts.name if title is None else title
    ax.set_title(title)

    ax.set_ylabel(str(self._ts.quantity))
    return ax

TrackObservation

modelskill.TrackObservation

attrs property writable

gtype property

n_points property

name property writable

plot instance-attribute

quantity property writable

time property

values property

weight property writable

x property writable

y property writable

equals

sel

to_dataframe

trim

modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter

hist

timeseries

attrs `property` `writable`

gtype `property`

n_points `property`

name `property` `writable`

plot `instance-attribute`

quantity `property` `writable`

time `property`

values `property`

weight `property` `writable`

x `property` `writable`

y `property` `writable`