The Vistula catchment is the largest catchment in Poland, with an area of 194,424 km2. This notebook shows how a hydrological model can evaluated using ModelSkill.
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import modelskill as ms
fldr = Path("../data/Vistula" )
df = pd.read_csv(fldr / "stations.csv" , index_col= 0 )
df
Id
6458010
VISTULA (TCZEW)
18.80556
54.08722
193922.9
6458500
VISTULA (WARSAW (WARSZAWA))
21.03250
52.24750
84945.1
6458753
PILICA (PRZEDBORZ)
19.87528
51.08944
2550.1
6458715
WIEPRZ (LUBARTOW)
22.64361
51.49806
6389.8
6458713
WIEPRZ (KRASNYSTAW)
23.17667
50.98528
3010.2
6458520
KAMIENNA (KUNOW)
21.27889
50.96194
1110.4
6458460
VISTULA (SANDOMIERZ)
21.74611
50.67250
31809.9
6458450
VISTULA (SZCZUCIN)
21.07722
50.32694
23869.3
6458406
DUNAJEC (NOWY SACZ)
20.68722
49.62722
4337.4
6158100
POPRAD (CHMELNICA)
20.73023
49.28918
1262.0
6458950
WISLOK (TRYNCZA)
22.54722
50.16222
3523.6
6458863
PISA (PTAKI)
21.79250
53.39306
3575.7
6458805
NAREW (SURAZ)
22.95500
52.94889
3425.3
6458924
LIWIEC (LOCHOW)
21.67833
52.51000
2471.4
6458555
KRZNA (MALOWA GORA)
23.46750
52.10361
3041.9
def get_comparer(df, mods, id ):
"""Get a Comparer object for a given model and station id"""
q = ms.Quantity(name= "Discharge" , unit= "m3/s" )
# create Observation object
fp = fldr / mods[0 ] / f" { id } .csv"
dfd = pd.read_csv(fp, index_col= 0 , parse_dates= True )
o = ms.PointObservation(dfd, item= "Qobs" , name= df.loc[id ].Station,
x= df.loc[id ].Long, y= df.loc[id ].Lat, quantity= q)
# create ModelResult objects
mm = []
for m in mods:
fp = fldr / m / f" { id } .csv"
dfd = pd.read_csv(fp, index_col= 0 , parse_dates= True )
mm.append(ms.PointModelResult(dfd, item= "Qsim" , name= m, quantity= q))
return ms.match(obs= o, mod= mm)
Compare a single observation with two model results
cmp = get_comparer(df, ["sim1" ,"sim2" ], df.index[0 ])
cmp
<Comparer>
Quantity: Discharge [m3/s]
Observation: VISTULA (TCZEW), n_points=3653
Model(s):
0: sim1
1: sim2
Plots
timeseries, scatter, boxplot, hist, kde, qq, taylor
cmp .sel(model= "sim1" ).plot.scatter();
Summary statistics
# set default metrics
ms.options.metrics.list = ["kge" , "cc" ]
model
observation
sim1
VISTULA (TCZEW)
3653
0.617
0.794
sim2
VISTULA (TCZEW)
3653
0.809
0.829
Statistics aggregated by month
cmp .data["month" ] = cmp .time.to_series().dt.month
cmp .skill(by= ["model" ,"month" ]) #["kge"].plot.bar();
model
month
sim1
1
VISTULA (TCZEW)
310
0.385138
0.440905
2
VISTULA (TCZEW)
283
0.578280
0.803975
3
VISTULA (TCZEW)
310
0.454622
0.730561
4
VISTULA (TCZEW)
300
0.509017
0.834517
5
VISTULA (TCZEW)
310
0.575944
0.697614
6
VISTULA (TCZEW)
300
0.344820
0.493330
7
VISTULA (TCZEW)
310
0.052140
0.298390
8
VISTULA (TCZEW)
310
0.219232
0.667376
9
VISTULA (TCZEW)
300
0.376622
0.552289
10
VISTULA (TCZEW)
310
0.626824
0.717061
11
VISTULA (TCZEW)
300
0.643888
0.741710
12
VISTULA (TCZEW)
310
0.288263
0.452674
sim2
1
VISTULA (TCZEW)
310
0.580579
0.622109
2
VISTULA (TCZEW)
283
0.844779
0.887547
3
VISTULA (TCZEW)
310
0.669936
0.767161
4
VISTULA (TCZEW)
300
0.764530
0.809858
5
VISTULA (TCZEW)
310
0.487875
0.655435
6
VISTULA (TCZEW)
300
0.496136
0.695364
7
VISTULA (TCZEW)
310
0.235510
0.540092
8
VISTULA (TCZEW)
310
0.269188
0.773781
9
VISTULA (TCZEW)
300
0.526274
0.741995
10
VISTULA (TCZEW)
310
0.719504
0.863399
11
VISTULA (TCZEW)
300
0.721616
0.840079
12
VISTULA (TCZEW)
310
0.506460
0.560932
cmp .skill(by= ["model" ,"month" ])["kge" ].plot.line()
plt.xlabel("Month" )
plt.xticks(np.arange(1 ,13 ), ["Jan" ,"Feb" ,"Mar" ,"Apr" ,"May" ,"Jun" ,"Jul" ,"Aug" ,"Sep" ,"Oct" ,"Nov" ,"Dec" ]);
Compare multiple observations with two model results
# loop through all stations in df and create a Comparer for each
cmps = []
for id in df.index:
try :
cmps.append(get_comparer(df, ["sim1" ,"sim2" ], id ))
except ValueError as e:
pass
cc = ms.ComparerCollection(cmps)
cc
<ComparerCollection>
Comparers:
0: VISTULA (TCZEW) - Discharge [m3/s]
1: PILICA (PRZEDBORZ) - Discharge [m3/s]
2: WIEPRZ (LUBARTOW) - Discharge [m3/s]
3: WIEPRZ (KRASNYSTAW) - Discharge [m3/s]
4: KAMIENNA (KUNOW) - Discharge [m3/s]
5: VISTULA (SANDOMIERZ) - Discharge [m3/s]
6: VISTULA (SZCZUCIN) - Discharge [m3/s]
7: DUNAJEC (NOWY SACZ) - Discharge [m3/s]
8: POPRAD (CHMELNICA) - Discharge [m3/s]
9: WISLOK (TRYNCZA) - Discharge [m3/s]
10: PISA (PTAKI) - Discharge [m3/s]
11: NAREW (SURAZ) - Discharge [m3/s]
12: LIWIEC (LOCHOW) - Discharge [m3/s]
13: KRZNA (MALOWA GORA) - Discharge [m3/s]
cc.skill(by= ["model" ,"observation" ], metrics= "kge" )["kge" ].plot.barh();
# Average skill over all stations, weighted by sqrt(area)
area = df.set_index("Station" ).loc[cc.obs_names].Area
cc.mean_skill(weights= np.sqrt(area)).round (3 )
model
sim1
51142
0.504
0.709
sim2
51142
0.586
0.717