Hydrology example from the Vistula catchment in Poland

The Vistula catchment is the largest catchment in Poland, with an area of 194,424 km2. This notebook shows how a hydrological model can evaluated using ModelSkill.

from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import modelskill as ms

fldr = Path("../data/Vistula")
df = pd.read_csv(fldr / "stations.csv", index_col=0)
df

	Station	Long	Lat	Area
Id
6458010	VISTULA (TCZEW)	18.80556	54.08722	193922.9
6458500	VISTULA (WARSAW (WARSZAWA))	21.03250	52.24750	84945.1
6458753	PILICA (PRZEDBORZ)	19.87528	51.08944	2550.1
6458715	WIEPRZ (LUBARTOW)	22.64361	51.49806	6389.8
6458713	WIEPRZ (KRASNYSTAW)	23.17667	50.98528	3010.2
6458520	KAMIENNA (KUNOW)	21.27889	50.96194	1110.4
6458460	VISTULA (SANDOMIERZ)	21.74611	50.67250	31809.9
6458450	VISTULA (SZCZUCIN)	21.07722	50.32694	23869.3
6458406	DUNAJEC (NOWY SACZ)	20.68722	49.62722	4337.4
6158100	POPRAD (CHMELNICA)	20.73023	49.28918	1262.0
6458950	WISLOK (TRYNCZA)	22.54722	50.16222	3523.6
6458863	PISA (PTAKI)	21.79250	53.39306	3575.7
6458805	NAREW (SURAZ)	22.95500	52.94889	3425.3
6458924	LIWIEC (LOCHOW)	21.67833	52.51000	2471.4
6458555	KRZNA (MALOWA GORA)	23.46750	52.10361	3041.9

def get_comparer(df, mods, id):
    """Get a Comparer object for a given model and station id"""
    q = ms.Quantity(name="Discharge", unit="m3/s")

    # create Observation object
    fp = fldr / mods[0] / f"{id}.csv"
    dfd = pd.read_csv(fp, index_col=0, parse_dates=True)
    o = ms.PointObservation(dfd, item="Qobs", name=df.loc[id].Station,
                            x=df.loc[id].Long, y=df.loc[id].Lat, quantity=q)
    
    # create ModelResult objects
    mm = []
    for m in mods:
        fp = fldr / m / f"{id}.csv"
        dfd = pd.read_csv(fp, index_col=0, parse_dates=True)
        mm.append(ms.PointModelResult(dfd, item="Qsim", name=m, quantity=q))
    
    return ms.match(obs=o, mod=mm)

Compare a single observation with two model results

cmp = get_comparer(df, ["sim1","sim2"], df.index[0])
cmp

<Comparer>
Quantity: Discharge [m3/s]
Observation: VISTULA (TCZEW), n_points=3653
Model(s):
0: sim1
1: sim2

Plots

timeseries, scatter, boxplot, hist, kde, qq, taylor

cmp.plot.timeseries();

cmp.sel(model="sim1").plot.scatter();

Summary statistics

# set default metrics
ms.options.metrics.list = ["kge", "cc"]

cmp.skill().round(3)

		n	kge	cc
model	observation
sim1	VISTULA (TCZEW)	3653	0.617	0.794
sim2	VISTULA (TCZEW)	3653	0.809	0.829

Statistics aggregated by month

cmp.data["month"] = cmp.time.to_series().dt.month

cmp.skill(by=["model","month"]) #["kge"].plot.bar();

		observation	n	kge	cc
model	month
sim1	1	VISTULA (TCZEW)	310	0.385138	0.440905
	2	VISTULA (TCZEW)	283	0.578280	0.803975
	3	VISTULA (TCZEW)	310	0.454622	0.730561
	4	VISTULA (TCZEW)	300	0.509017	0.834517
	5	VISTULA (TCZEW)	310	0.575944	0.697614
	6	VISTULA (TCZEW)	300	0.344820	0.493330
	7	VISTULA (TCZEW)	310	0.052140	0.298390
	8	VISTULA (TCZEW)	310	0.219232	0.667376
	9	VISTULA (TCZEW)	300	0.376622	0.552289
	10	VISTULA (TCZEW)	310	0.626824	0.717061
	11	VISTULA (TCZEW)	300	0.643888	0.741710
	12	VISTULA (TCZEW)	310	0.288263	0.452674
sim2	1	VISTULA (TCZEW)	310	0.580579	0.622109
	2	VISTULA (TCZEW)	283	0.844779	0.887547
	3	VISTULA (TCZEW)	310	0.669936	0.767161
	4	VISTULA (TCZEW)	300	0.764530	0.809858
	5	VISTULA (TCZEW)	310	0.487875	0.655435
	6	VISTULA (TCZEW)	300	0.496136	0.695364
	7	VISTULA (TCZEW)	310	0.235510	0.540092
	8	VISTULA (TCZEW)	310	0.269188	0.773781
	9	VISTULA (TCZEW)	300	0.526274	0.741995
	10	VISTULA (TCZEW)	310	0.719504	0.863399
	11	VISTULA (TCZEW)	300	0.721616	0.840079
	12	VISTULA (TCZEW)	310	0.506460	0.560932

cmp.skill(by=["model","month"])["kge"].plot.line()
plt.xlabel("Month")
plt.xticks(np.arange(1,13), ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]);

Compare multiple observations with two model results

# loop through all stations in df and create a Comparer for each
cmps = []
for id in df.index:
   try:
      cmps.append(get_comparer(df, ["sim1","sim2"], id))
   except ValueError as e:
      pass
cc = ms.ComparerCollection(cmps)
cc

<ComparerCollection>
Comparers:
0: VISTULA (TCZEW) - Discharge [m3/s]
1: PILICA (PRZEDBORZ) - Discharge [m3/s]
2: WIEPRZ (LUBARTOW) - Discharge [m3/s]
3: WIEPRZ (KRASNYSTAW) - Discharge [m3/s]
4: KAMIENNA (KUNOW) - Discharge [m3/s]
5: VISTULA (SANDOMIERZ) - Discharge [m3/s]
6: VISTULA (SZCZUCIN) - Discharge [m3/s]
7: DUNAJEC (NOWY SACZ) - Discharge [m3/s]
8: POPRAD (CHMELNICA) - Discharge [m3/s]
9: WISLOK (TRYNCZA) - Discharge [m3/s]
10: PISA (PTAKI) - Discharge [m3/s]
11: NAREW (SURAZ) - Discharge [m3/s]
12: LIWIEC (LOCHOW) - Discharge [m3/s]
13: KRZNA (MALOWA GORA) - Discharge [m3/s]

cc.skill(by=["model","observation"], metrics="kge")["kge"].plot.barh();

# Average skill over all stations, weighted by sqrt(area)
area = df.set_index("Station").loc[cc.obs_names].Area
cc.mean_skill(weights=np.sqrt(area)).round(3)

	n	kge	cc
model
sim1	51142	0.504	0.709
sim2	51142	0.586	0.717