{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Article for Journal of Open Source Software\n", "\n", "*Martin Vonk (2025)*\n", "\n", "This notebook replicates the results presented in the article submitted to the Journal of Open Source Software ([JOSS](https://joss.theoj.org/)). The article can be found here: Vonk, M. A. (2025). SPEI: A Python package for calculating and visualizing drought indices. Journal of Open Source Software, 10(111), 8454. [doi.org/10.21105/joss.08454](https://doi.org/10.21105/joss.08454)\n", "\n", "\n", "JOSS is a developer-friendly, open-access academic journal (ISSN 2475-9066) dedicated to research software packages and features a formal peer-review process. The pre-review and review of the SPEI package are publicly available in issues [openjournals/joss-reviews#8430](https://github.com/openjournals/joss-reviews/issues/8430) and [openjournals/joss-reviews#8454](https://github.com/openjournals/joss-reviews/issues/8454), respectively." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# dependencies\n", "from typing import Literal\n", "\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import scipy.stats as sps\n", "from cycler import cycler\n", "from matplotlib import patheffects\n", "from scipy.stats._survival import EmpiricalDistributionFunction\n", "\n", "import spei as si\n", "\n", "# matplotlib settings\n", "plt.rcParams.update(\n", " {\n", " \"axes.prop_cycle\": cycler(\n", " color=[\n", " \"#3f90da\",\n", " \"#ffa90e\",\n", " \"#bd1f01\",\n", " \"#94a4a2\",\n", " \"#832db6\",\n", " \"#a96b59\",\n", " \"#e76300\",\n", " \"#b9ac70\",\n", " \"#717581\",\n", " \"#92dadd\",\n", " ]\n", " ),\n", " \"axes.titlesize\": 7.0,\n", " \"axes.labelsize\": 7.0,\n", " \"xtick.labelsize\": 6.0,\n", " \"ytick.labelsize\": 6.0,\n", " \"legend.fontsize\": 7.0,\n", " \"legend.framealpha\": 1.0,\n", " }\n", ")\n", "\n", "\n", "# helper functions\n", "def axes_indicator(\n", " ax: plt.Axes,\n", " letter: str,\n", " x: float,\n", " y: float,\n", " ha: Literal[\"left\", \"right\"],\n", " va: Literal[\"top\", \"bottom\"],\n", "):\n", " \"\"\"Add an indicator to the axes.\"\"\"\n", " ax.annotate(\n", " f\"({letter})\",\n", " xy=(x, y),\n", " xycoords=\"axes fraction\",\n", " fontsize=mpl.rcParams[\"axes.titlesize\"],\n", " horizontalalignment=ha,\n", " verticalalignment=va,\n", " path_effects=[\n", " patheffects.Stroke(linewidth=1, foreground=\"white\"),\n", " patheffects.Normal(),\n", " ],\n", " )\n", "\n", "\n", "def plot_ecdf(\n", " ax: plt.Axes,\n", " data: pd.Series,\n", " ecdf: EmpiricalDistributionFunction,\n", " s: float,\n", " color: str,\n", " label: str,\n", " cdf: pd.Series | None = None,\n", " **kwargs,\n", ") -> None:\n", " data = data.drop_duplicates()\n", " ax.scatter(\n", " data,\n", " ecdf.probabilities,\n", " s=s,\n", " facecolor=color,\n", " label=label,\n", " **kwargs,\n", " )\n", " if cdf is not None:\n", " for idata, icdf, iecdf in zip(data, cdf, ecdf.probabilities):\n", " ax.plot(\n", " [idata, idata],\n", " [iecdf, icdf],\n", " color=color,\n", " linewidth=0.5,\n", " **kwargs,\n", " )\n", " return ecdf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"data/CABAUW.csv\", index_col=0, parse_dates=True)\n", "prec = df[\"prec\"]\n", "evap = df[\"evap\"]\n", "surplusd = prec - evap\n", "surplus = surplusd.resample(\"MS\").sum()\n", "head = df[\"head\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plot" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# highlight specific month\n", "month = 3\n", "ts = pd.Timestamp(\"2000-{:02d}-01\".format(month))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig, axd = plt.subplot_mosaic(\n", " [[\"meteo\"], [\"sp\"]], figsize=(7.0, 3.2), sharex=True, layout=\"constrained\"\n", ")\n", "\n", "axd[\"meteo\"].plot(prec.index, prec, linewidth=0.8, color=\"C0\")\n", "axd[\"meteo\"].plot(evap.index, evap, linewidth=0.8, color=\"C6\")\n", "axd[\"meteo\"].plot([], [], color=\"C0\", label=\"Precipitation\")\n", "axd[\"meteo\"].plot([], [], color=\"C6\", label=\"Potential Evaporation\")\n", "\n", "axd[\"meteo\"].legend(loc=(0, 1), ncol=2, frameon=False, columnspacing=1.0)\n", "axd[\"meteo\"].set_ylabel(\"Flux (mm/day)\")\n", "\n", "axd[\"meteo\"].yaxis.set_major_locator(mpl.ticker.MultipleLocator(10))\n", "axd[\"meteo\"].yaxis.set_minor_locator(mpl.ticker.MultipleLocator(5))\n", "axd[\"meteo\"].set_ylim(bottom=0.0)\n", "axes_indicator(axd[\"meteo\"], letter=\"a\", x=0.005, y=0.97, ha=\"left\", va=\"top\")\n", "\n", "axd[\"sp\"].plot(\n", " surplus.index,\n", " surplus.values,\n", " color=\"C3\",\n", " linewidth=1.0,\n", " marker=\".\",\n", " markersize=2.0,\n", " label=\"Monthly Surplus (Precipitation minus Evaporation)\",\n", ")\n", "mid = surplus.index.month == ts.month\n", "axd[\"sp\"].scatter(\n", " surplus.index[mid], # + pd.Timedelta(days=15),\n", " surplus.values[mid],\n", " color=\"C2\",\n", " s=5.0,\n", " zorder=2,\n", " label=f\"Data points {ts.strftime('%B')}\",\n", ")\n", "axd[\"sp\"].yaxis.set_major_locator(mpl.ticker.MultipleLocator(50))\n", "axd[\"sp\"].yaxis.set_minor_locator(mpl.ticker.MultipleLocator(25))\n", "axd[\"sp\"].xaxis.set_minor_locator(mpl.dates.YearLocator(1))\n", "axd[\"sp\"].xaxis.set_major_locator(mpl.dates.YearLocator(2))\n", "axd[\"sp\"].set_xlim(surplus.index[0], surplus.index[-1])\n", "axd[\"sp\"].set_ylabel(\"Precipitation\\nsurplus (mm)\")\n", "axd[\"sp\"].legend(loc=(0, 1), frameon=False, ncol=2)\n", "axes_indicator(axd[\"sp\"], letter=\"b\", x=0.005, y=0.97, ha=\"left\", va=\"top\")\n", "\n", "axd[\"sp\"].set_xlim(pd.Timestamp(\"1990\"), pd.Timestamp(\"2020\"))\n", "\n", "# fig.savefig(\"../../paper/figures/monthly_precipitation_surplus.png\", dpi=300, bbox_inches=\"tight\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Standardized Index Procedure" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Fit Distribution" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dist = sps.fisk\n", "sispei = si.SI(\n", " series=surplus,\n", " dist=dist,\n", " timescale=1,\n", " # fit_freq=\"MS\",\n", ")\n", "sispei.fit_distribution()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Equiprobability Transform" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fit_dist = sispei._dist_dict[ts]\n", "data = fit_dist.data.sort_values()\n", "cdf = fit_dist.cdf().loc[data.index]\n", "ecdf = sps.ecdf(data).cdf\n", "\n", "zscores = np.arange(-3.0, 3.1, 0.1)\n", "norm_cdf = sps.norm.cdf(zscores, loc=0.0, scale=1.0)\n", "norm_cdf_transformed = sps.norm.ppf(cdf.values, loc=0.0, scale=1.0)\n", "\n", "fig, axd = plt.subplot_mosaic(\n", " [[\"cdf\", \"norm\"]],\n", " figsize=(7.0, 3),\n", " width_ratios=[1.5, 1.0],\n", " sharey=True,\n", " layout=\"tight\",\n", ")\n", "plot_ecdf(\n", " ax=axd[\"cdf\"],\n", " data=data,\n", " cdf=cdf,\n", " ecdf=ecdf,\n", " s=10.0,\n", " color=\"C2\",\n", " label=f\"Data points {ts.strftime('%B')}\",\n", " zorder=3,\n", ")\n", "\n", "bin = 5.0\n", "bins = np.arange(data.min() // bin * bin, data.max() + bin, bin)\n", "axd[\"cdf\"].plot(\n", " bins,\n", " fit_dist.dist.cdf(bins, *fit_dist.pars, loc=fit_dist.loc, scale=fit_dist.scale),\n", " label=f\"Fitted {dist.name} distribution\",\n", " color=\"C0\",\n", ")\n", "\n", "axd[\"cdf\"].legend(loc=\"upper left\")\n", "axd[\"cdf\"].set_xlim(np.min(bins), np.max(bins))\n", "axd[\"cdf\"].xaxis.set_minor_locator(mpl.ticker.MultipleLocator(bin))\n", "axd[\"cdf\"].xaxis.set_major_locator(mpl.ticker.MultipleLocator(bin * 2))\n", "axd[\"cdf\"].set_ylim(0.0, 1.0)\n", "axd[\"cdf\"].yaxis.set_major_locator(mpl.ticker.MultipleLocator(0.1))\n", "axd[\"cdf\"].yaxis.set_major_formatter(mpl.ticker.PercentFormatter(1.0))\n", "axd[\"cdf\"].set_xlabel(\"Precipitation surplus (mm)\")\n", "axd[\"cdf\"].set_ylabel(\"Cumulative probability\")\n", "axes_indicator(axd[\"cdf\"], \"a\", 0.99, 0.02, ha=\"right\", va=\"bottom\")\n", "\n", "axd[\"norm\"].plot(\n", " zscores, norm_cdf, label=\"Standardized\\nnormal distribution\", color=\"C4\", zorder=3\n", ")\n", "axd[\"norm\"].scatter(\n", " norm_cdf_transformed,\n", " cdf.values,\n", " s=10.0,\n", " label=f\"Projected points\\n{dist.name} distribution\",\n", " color=\"C0\",\n", " zorder=2,\n", ")\n", "axd[\"norm\"].legend(loc=\"upper left\")\n", "axd[\"norm\"].set_xlim(np.min(zscores), np.max(zscores))\n", "axd[\"norm\"].set_xlabel(\"Z-score / SPEI value\")\n", "\n", "# visualize specific data point\n", "idx = data.index[20]\n", "cdf_idx = cdf.at[idx]\n", "ppf_idx = sps.norm.ppf(cdf_idx)\n", "print(\n", " f\"Data index: {idx.strftime('%Y')}, Data value: {data.loc[idx]:0.2f} CDF: {cdf_idx:0.1%}, PPF: {ppf_idx:0.4f}\"\n", ")\n", "axd[\"cdf\"].plot(\n", " [data.loc[idx], data.loc[idx], np.max(data)],\n", " [0.0, cdf_idx, cdf_idx],\n", " color=\"k\",\n", " linestyle=\"--\",\n", " linewidth=1.0,\n", " zorder=0,\n", ")\n", "axd[\"norm\"].plot(\n", " [np.min(zscores), ppf_idx, ppf_idx],\n", " [\n", " cdf_idx,\n", " cdf_idx,\n", " 0.0,\n", " ],\n", " color=\"k\",\n", " linestyle=\"--\",\n", " linewidth=1.0,\n", " zorder=0,\n", ")\n", "axes_indicator(axd[\"norm\"], \"b\", 0.99, 0.02, ha=\"right\", va=\"bottom\")\n", "\n", "# fig.savefig(\"../../paper/figures/surplus_fit_cdf.png\", dpi=300, bbox_inches=\"tight\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Results\n", "\n", "#### Time Series" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "spei1 = sispei.norm_ppf()\n", "\n", "ax = si.plot.si(spei1, figsize=(7.0, 2.0), layout=\"tight\")\n", "# ax.xaxis.set_minor_locator(mpl.dates.MonthLocator())\n", "ax.xaxis.set_minor_locator(mpl.dates.YearLocator(1))\n", "ax.xaxis.set_major_locator(mpl.dates.YearLocator(2))\n", "ax.legend(labels=[\"SPEI-1\"], loc=(0, 1), frameon=False)\n", "ax.set_xlim(pd.Timestamp(\"1990\"), pd.Timestamp(\"2020\"))\n", "ax.set_ylabel(\"Z-score\")\n", "\n", "# ax.get_figure().savefig(\"../../paper/figures/spei1.png\", dpi=300, bbox_inches=\"tight\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Heatmap" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "speis = [\n", " spei1.rename(\"1\"),\n", " si.spei(surplus, timescale=3).rename(\"3\"),\n", " si.spei(surplus, timescale=6).rename(\"6\"),\n", " si.spei(surplus, timescale=9).rename(\"9\"),\n", " si.spei(surplus, timescale=12).rename(\"12\"),\n", " si.spei(surplus, timescale=24).rename(\"24\"),\n", "]\n", "f, ax = plt.subplots(figsize=(7.0, 2.0))\n", "si.plot.heatmap(speis, cmap=\"vik_r\", vmin=-3, vmax=3, add_category=False, ax=ax)\n", "ax.set_ylabel(\"Time scale (months)\")\n", "f.axes[-1].set_ylabel(\"Z-score\")\n", "ax.xaxis.set_minor_locator(mpl.dates.YearLocator(1))\n", "ax.xaxis.set_major_locator(mpl.dates.YearLocator(2))\n", "ax.set_xlim(pd.Timestamp(\"1990\"), pd.Timestamp(\"2020\"))\n", "\n", "# ax.get_figure().savefig(\"../../paper/figures/spei_heatmap.png\", dpi=300, bbox_inches=\"tight\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Threshold" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "perc = sps.norm.cdf(-1.0) # same as zscore -1.0\n", "thres = sispei.ppf(perc).rename(f\"Threshold {perc:0.0%} percentile\")\n", "fig, ax = plt.subplots(figsize=(7.0, 2.0), layout=\"tight\")\n", "ax = si.plot.threshold(\n", " surplus,\n", " thres,\n", " ax=ax,\n", " **dict(\n", " color=\"C3\",\n", " linewidth=1.0,\n", " marker=\".\",\n", " markersize=2.0,\n", " label=\"Monthly Surplus (Precipitation minus Evaporation)\",\n", " ),\n", ")\n", "ax.set_xlim(pd.Timestamp(\"2003\"), pd.Timestamp(\"2019\"))\n", "ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(50))\n", "ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(25))\n", "ax.xaxis.set_major_locator(mpl.dates.YearLocator(1))\n", "ax.xaxis.set_minor_locator(mpl.dates.MonthLocator([4, 7, 10]))\n", "ax.set_ylabel(\"Precipitation\\nsurplus (mm)\")\n", "ax.legend(ncol=3, loc=(0, 1), frameon=False)\n", "\n", "# fig.savefig(\"../../paper/figures/threshold.png\", dpi=300, bbox_inches=\"tight\")" ] } ], "metadata": { "kernelspec": { "display_name": "SPEI", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 2 }