Skip to content

TrackObservation

modelskill.TrackObservation

Bases: Observation

Class for observation with locations moving in space, e.g. satellite altimetry

The data needs in addition to the datetime of each single observation point also, x and y coordinates.

Create TrackObservation from dfs0 or DataFrame

Parameters:

Name Type Description Default
data (str, Path, Dataset, DataFrame, Dataset)

path to dfs0 file or object with track data

required
item (str, int)

item name or index of values, by default None if data contains more than one item, item must be given

None
name str

user-defined name for easy identification in plots etc, by default file basename

None
x_item (str, int)

item name or index of x-coordinate, by default 0

0
y_item (str, int)

item name or index of y-coordinate, by default 1

1
keep_duplicates (str, bool)

strategy for handling duplicate timestamps (xarray.Dataset.drop_duplicates): "first" to keep first occurrence, "last" to keep last occurrence, False to drop all duplicates, "offset" to add milliseconds to consecutive duplicates, by default "first"

'first'
quantity Quantity

The quantity of the observation, for validation with model results For MIKE dfs files this is inferred from the EUM information

None
aux_items list

list of names or indices of auxiliary items, by default None

None
attrs dict

additional attributes to be added to the data, by default None

None
weight float

weighting factor for skill scores, by default 1.0

1.0

Examples:

>>> import modelskill as ms
>>> o1 = ms.TrackObservation("track.dfs0", item=2, name="c2")
>>> o1 = ms.TrackObservation("track.dfs0", item="wind_speed", name="c2")
>>> o1 = ms.TrackObservation("lon_after_lat.dfs0", item="wl", x_item=1, y_item=0)
>>> o1 = ms.TrackObservation("track_wl.dfs0", item="wl", x_item="lon", y_item="lat")
>>> df = pd.DataFrame(
...         {
...             "t": pd.date_range("2010-01-01", freq="10s", periods=n),
...             "x": np.linspace(0, 10, n),
...             "y": np.linspace(45000, 45100, n),
...             "swh": [0.1, 0.3, 0.4, 0.5, 0.3],
...         }
... )
>>> df = df.set_index("t")
>>> df
                    x        y  swh
t
2010-01-01 00:00:00   0.0  45000.0  0.1
2010-01-01 00:00:10   2.5  45025.0  0.3
2010-01-01 00:00:20   5.0  45050.0  0.4
2010-01-01 00:00:30   7.5  45075.0  0.5
2010-01-01 00:00:40  10.0  45100.0  0.3
>>> t1 = TrackObservation(df, name="fake")
>>> t1.n_points
5
>>> t1.values
array([0.1, 0.3, 0.4, 0.5, 0.3])
>>> t1.time
DatetimeIndex(['2010-01-01 00:00:00', '2010-01-01 00:00:10',
           '2010-01-01 00:00:20', '2010-01-01 00:00:30',
           '2010-01-01 00:00:40'],
          dtype='datetime64[ns]', name='t', freq=None)
>>> t1.x
array([ 0. ,  2.5,  5. ,  7.5, 10. ])
>>> t1.y
array([45000., 45025., 45050., 45075., 45100.])
Source code in modelskill/obs.py
class TrackObservation(Observation):
    """Class for observation with locations moving in space, e.g. satellite altimetry

    The data needs in addition to the datetime of each single observation point also, x and y coordinates.

    Create TrackObservation from dfs0 or DataFrame

    Parameters
    ----------
    data : (str, Path, mikeio.Dataset, pd.DataFrame, xr.Dataset)
        path to dfs0 file or object with track data
    item : (str, int), optional
        item name or index of values, by default None
        if data contains more than one item, item must be given
    name : str, optional
        user-defined name for easy identification in plots etc, by default file basename
    x_item : (str, int), optional
        item name or index of x-coordinate, by default 0
    y_item : (str, int), optional
        item name or index of y-coordinate, by default 1
    keep_duplicates : (str, bool), optional
        strategy for handling duplicate timestamps (xarray.Dataset.drop_duplicates):
        "first" to keep first occurrence, "last" to keep last occurrence,
        False to drop all duplicates, "offset" to add milliseconds to
        consecutive duplicates, by default "first"
    quantity : Quantity, optional
        The quantity of the observation, for validation with model results
        For MIKE dfs files this is inferred from the EUM information
    aux_items : list, optional
        list of names or indices of auxiliary items, by default None
    attrs : dict, optional
        additional attributes to be added to the data, by default None
    weight : float, optional
        weighting factor for skill scores, by default 1.0

    Examples
    --------
    >>> import modelskill as ms
    >>> o1 = ms.TrackObservation("track.dfs0", item=2, name="c2")

    >>> o1 = ms.TrackObservation("track.dfs0", item="wind_speed", name="c2")

    >>> o1 = ms.TrackObservation("lon_after_lat.dfs0", item="wl", x_item=1, y_item=0)

    >>> o1 = ms.TrackObservation("track_wl.dfs0", item="wl", x_item="lon", y_item="lat")

    >>> df = pd.DataFrame(
    ...         {
    ...             "t": pd.date_range("2010-01-01", freq="10s", periods=n),
    ...             "x": np.linspace(0, 10, n),
    ...             "y": np.linspace(45000, 45100, n),
    ...             "swh": [0.1, 0.3, 0.4, 0.5, 0.3],
    ...         }
    ... )
    >>> df = df.set_index("t")
    >>> df
                        x        y  swh
    t
    2010-01-01 00:00:00   0.0  45000.0  0.1
    2010-01-01 00:00:10   2.5  45025.0  0.3
    2010-01-01 00:00:20   5.0  45050.0  0.4
    2010-01-01 00:00:30   7.5  45075.0  0.5
    2010-01-01 00:00:40  10.0  45100.0  0.3
    >>> t1 = TrackObservation(df, name="fake")
    >>> t1.n_points
    5
    >>> t1.values
    array([0.1, 0.3, 0.4, 0.5, 0.3])
    >>> t1.time
    DatetimeIndex(['2010-01-01 00:00:00', '2010-01-01 00:00:10',
               '2010-01-01 00:00:20', '2010-01-01 00:00:30',
               '2010-01-01 00:00:40'],
              dtype='datetime64[ns]', name='t', freq=None)
    >>> t1.x
    array([ 0. ,  2.5,  5. ,  7.5, 10. ])
    >>> t1.y
    array([45000., 45025., 45050., 45075., 45100.])

    """

    def __init__(
        self,
        data: TrackType,
        *,
        item: Optional[int | str] = None,
        name: Optional[str] = None,
        weight: float = 1.0,
        x_item: Optional[int | str] = 0,
        y_item: Optional[int | str] = 1,
        keep_duplicates: bool | str = "first",
        offset_duplicates: float = 0.001,
        quantity: Optional[Quantity] = None,
        aux_items: Optional[list[int | str]] = None,
        attrs: Optional[dict] = None,
    ) -> None:
        if not self._is_input_validated(data):
            if offset_duplicates != 0.001:
                warnings.warn(
                    "The 'offset_duplicates' argument is deprecated, use 'keep_duplicates' argument.",
                    FutureWarning,
                )
            data = _parse_track_input(
                data=data,
                name=name,
                item=item,
                quantity=quantity,
                x_item=x_item,
                y_item=y_item,
                keep_duplicates=keep_duplicates,
                offset_duplicates=offset_duplicates,
                aux_items=aux_items,
            )
        assert isinstance(data, xr.Dataset)
        super().__init__(data=data, weight=weight, attrs=attrs)

attrs property writable

attrs

Attributes of the observation

gtype property

gtype

Geometry type

n_points property

n_points

Number of data points

name property writable

name

Name of time series (value item name)

plot instance-attribute

plot = plotter(self)

Plot using the ComparerPlotter

Examples:

>>> obj.plot.timeseries()
>>> obj.plot.hist()

quantity property writable

quantity

Quantity of time series

time property

time

Time index

values property

values

Values as numpy array

weight property writable

weight

Weighting factor for skill scores

x property writable

x

x-coordinate

y property writable

y

y-coordinate

equals

equals(other)

Check if two TimeSeries are equal

Source code in modelskill/timeseries/_timeseries.py
def equals(self, other: TimeSeries) -> bool:
    """Check if two TimeSeries are equal"""
    return self.data.equals(other.data)

sel

sel(**kwargs)

Select data by label

Source code in modelskill/timeseries/_timeseries.py
def sel(self: T, **kwargs: Any) -> T:
    """Select data by label"""
    return self.__class__(self.data.sel(**kwargs))

to_dataframe

to_dataframe()

Convert matched data to pandas DataFrame

Include x, y coordinates only if gtype=track

Returns:

Type Description
DataFrame

data as a pandas DataFrame

Source code in modelskill/timeseries/_timeseries.py
def to_dataframe(self) -> pd.DataFrame:
    """Convert matched data to pandas DataFrame

    Include x, y coordinates only if gtype=track

    Returns
    -------
    pd.DataFrame
        data as a pandas DataFrame
    """
    if self.gtype == str(GeometryType.POINT):
        # we remove the scalar coordinate variables as they
        # will otherwise be columns in the dataframe
        return self.data.drop_vars(["x", "y", "z"]).to_dataframe()
    elif self.gtype == str(GeometryType.TRACK):
        df = self.data.drop_vars(["z"]).to_dataframe()
        # make sure that x, y cols are first
        cols = ["x", "y"] + [c for c in df.columns if c not in ["x", "y"]]
        return df[cols]
    else:
        raise NotImplementedError(f"Unknown gtype: {self.gtype}")

trim

trim(start_time=None, end_time=None, buffer='1s')

Trim observation data to a given time interval

Parameters:

Name Type Description Default
start_time Timestamp

start time

None
end_time Timestamp

end time

None
buffer str

buffer time around start and end time, by default "1s"

'1s'
Source code in modelskill/timeseries/_timeseries.py
def trim(
    self: T,
    start_time: Optional[pd.Timestamp] = None,
    end_time: Optional[pd.Timestamp] = None,
    buffer: str = "1s",
) -> T:
    """Trim observation data to a given time interval

    Parameters
    ----------
    start_time : pd.Timestamp
        start time
    end_time : pd.Timestamp
        end time
    buffer : str, optional
        buffer time around start and end time, by default "1s"
    """
    # Expand time interval with buffer
    start_time = pd.Timestamp(start_time) - pd.Timedelta(buffer)
    end_time = pd.Timestamp(end_time) + pd.Timedelta(buffer)

    data = self.data.sel(time=slice(start_time, end_time))
    if len(data.time) == 0:
        raise ValueError(
            f"No data left after trimming to {start_time} - {end_time}"
        )
    return self.__class__(data)

modelskill.timeseries._plotter.MatplotlibTimeSeriesPlotter

Bases: TimeSeriesPlotter

Source code in modelskill/timeseries/_plotter.py
class MatplotlibTimeSeriesPlotter(TimeSeriesPlotter):
    def __init__(self, ts) -> None:
        self._ts = ts

    def __call__(self, **kwargs):
        # default to timeseries plot
        self.timeseries(**kwargs)

    def timeseries(
        self, title=None, color=None, marker=".", linestyle="None", **kwargs
    ):
        """Plot timeseries

        Wraps pandas.DataFrame plot() method.

        Parameters
        ----------
        title : str, optional
            plot title, default: [name]
        color : str, optional
            plot color, by default '#d62728'
        marker : str, optional
            plot marker, by default '.'
        linestyle : str, optional
            line style, by default None
        **kwargs
            other keyword arguments to df.plot()
        """
        kwargs["color"] = self._ts._color if color is None else color
        ax = self._ts._values_as_series.plot(
            marker=marker, linestyle=linestyle, **kwargs
        )

        title = self._ts.name if title is None else title
        ax.set_title(title)

        ax.set_ylabel(str(self._ts.quantity))
        return ax

    def hist(self, bins=100, title=None, color=None, **kwargs):
        """Plot histogram of timeseries values

        Wraps pandas.DataFrame hist() method.

        Parameters
        ----------
        bins : int, optional
            specification of bins, by default 100
        title : str, optional
            plot title, default: observation name
        color : str, optional
            plot color, by default "#d62728"
        **kwargs
            other keyword arguments to df.hist()

        Returns
        -------
        matplotlib axes
        """
        title = self._ts.name if title is None else title

        kwargs["color"] = self._ts._color if color is None else color

        ax = self._ts._values_as_series.hist(bins=bins, **kwargs)
        ax.set_title(title)
        ax.set_xlabel(str(self._ts.quantity))
        return ax

hist

hist(bins=100, title=None, color=None, **kwargs)

Plot histogram of timeseries values

Wraps pandas.DataFrame hist() method.

Parameters:

Name Type Description Default
bins int

specification of bins, by default 100

100
title str

plot title, default: observation name

None
color str

plot color, by default "#d62728"

None
**kwargs

other keyword arguments to df.hist()

{}

Returns:

Type Description
matplotlib axes
Source code in modelskill/timeseries/_plotter.py
def hist(self, bins=100, title=None, color=None, **kwargs):
    """Plot histogram of timeseries values

    Wraps pandas.DataFrame hist() method.

    Parameters
    ----------
    bins : int, optional
        specification of bins, by default 100
    title : str, optional
        plot title, default: observation name
    color : str, optional
        plot color, by default "#d62728"
    **kwargs
        other keyword arguments to df.hist()

    Returns
    -------
    matplotlib axes
    """
    title = self._ts.name if title is None else title

    kwargs["color"] = self._ts._color if color is None else color

    ax = self._ts._values_as_series.hist(bins=bins, **kwargs)
    ax.set_title(title)
    ax.set_xlabel(str(self._ts.quantity))
    return ax

timeseries

timeseries(title=None, color=None, marker='.', linestyle='None', **kwargs)

Plot timeseries

Wraps pandas.DataFrame plot() method.

Parameters:

Name Type Description Default
title str

plot title, default: [name]

None
color str

plot color, by default '#d62728'

None
marker str

plot marker, by default '.'

'.'
linestyle str

line style, by default None

'None'
**kwargs

other keyword arguments to df.plot()

{}
Source code in modelskill/timeseries/_plotter.py
def timeseries(
    self, title=None, color=None, marker=".", linestyle="None", **kwargs
):
    """Plot timeseries

    Wraps pandas.DataFrame plot() method.

    Parameters
    ----------
    title : str, optional
        plot title, default: [name]
    color : str, optional
        plot color, by default '#d62728'
    marker : str, optional
        plot marker, by default '.'
    linestyle : str, optional
        line style, by default None
    **kwargs
        other keyword arguments to df.plot()
    """
    kwargs["color"] = self._ts._color if color is None else color
    ax = self._ts._values_as_series.plot(
        marker=marker, linestyle=linestyle, **kwargs
    )

    title = self._ts.name if title is None else title
    ax.set_title(title)

    ax.set_ylabel(str(self._ts.quantity))
    return ax