\documentclass[12pt]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage[english]{babel}
\usepackage{amsmath, amssymb}
\usepackage{graphicx}
\usepackage{float}
\usepackage{booktabs}
\usepackage[colorlinks=true, citecolor=blue, linkcolor=blue, urlcolor=blue]{hyperref}
\usepackage{xurl}
\usepackage{geometry}
\usepackage{lineno}
\usepackage{array}
\geometry{a4paper, margin=2.5cm}

% Command to display DOIs as hyperlinks in the bibliography.
\providecommand{\doi}[1]{\href{https://doi.org/#1}{doi:#1}}

% Enable/disable line numbering for review.
% Comment the following line for the final preprint version.
%\newcommand{\enablelinenumbers}{\linenumbers}

\title{GRADEX-DUAL: A Dual Framework Combining Point and Regional Frequency Analysis for GRADEX Parameter Estimation in Basins with Sparse Rain Gauge Networks}
\author{Mauricio Javier Victoria Niño%
  \thanks{Independent Researcher, Colombia. ORCID: 0009-0003-4328-5691.\protect\\ E-mail: \texttt{hidratecsa@gmail.com}}}
\date{}

\begin{document}
%\enablelinenumbers

\maketitle

\noindent \textit{This document is a preprint that has not been peer-reviewed, submitted to EarthArXiv. Source code and example data are available at: \url{https://github.com/MauricioVictoriaN/GRADEX-DUAL}.}

\vspace{1em}

\begin{center}
\textbf{\large Abstract}
\end{center}

\noindent GRADEX-DUAL v1.0, an open-source computational framework implemented in R, is presented. It combines two complementary methodological layers for estimating the GRADEX parameter —defined as the slope of the annual maximum precipitation quantile function on Gumbel probability paper between return periods \(T = 10\) and \(T = 100\) years—, which is fundamental for the GRADEX-IDF methodology applied to hydrological design with HEC-HMS.

The first layer follows the classic pointwise approach of Guillot and Duband (1967): fitting four probability distributions per station (Gumbel, Log-Normal, Pearson III, and Gamma) using L-moments and maximum likelihood; two-stage model selection (Anderson-Darling followed by minimum RMSE); and regionalization to the gauging point using Inverse Distance Weighting (IDW) with leave-one-out (LOO) cross-validated optimal exponent or Ordinary Kriging with automatic variogram fitting. A multi-metric scoring rule (RMSE + \(R^2\) + |BIAS|) selects the better spatial method, with an anti-degenerate-Kriging safeguard that disqualifies Kriging whenever its LOO prediction-to-observation variance ratio falls below 0.10 —indicating variogram collapse to a near-constant predictor.

The second layer implements the Hosking and Wallis (1997) regional frequency analysis: record-length-weighted regional L-moments, selection of the optimal regional distribution among five candidate distributions (GEV, GLO, GNO, PE3, and GPA) using the Monte Carlo-calibrated goodness-of-fit \(Z\) statistic, and 95\% confidence intervals computed using the H\&W bootstrap simulation algorithm.

A three-gate quality-control weighting system integrates both estimates: the H\&W weight is nullified when the bootstrap confidence interval amplitude exceeds 100\%, the spatial weight is nullified when the LOO cross-validation \(R^2\) falls below 0.3, and Kriging is disqualified when its LOO variance ratio falls below 0.10. Prior diagnostics include the Ljung-Box test for serial independence, the Hosking-Wallis \(H\) statistic for regional homogeneity, the discordancy measure \(D\), Mann-Kendall trend tests with Theil-Sen slope estimator, a non-stationarity sensitivity analysis on detrended series, and a Monte Carlo validation experiment against synthetic regions with known true GRADEX.

The framework is evaluated on a nine-station IDEAM network in the upper Porce River basin (Antioquia, Colombia, 254 station-years). The Porce is a major tributary of the Nechí River, which in turn flows into the Cauca River and ultimately into the Magdalena, so the study area belongs hydrographically to the lower Cauca macro-basin. Results show that even with a homogeneous region (\(H = -0.92\)) and a network surpassing the H\&W minimum threshold, the bootstrap confidence interval amplitude reaches 121\% and the spatial \(R^2\) collapses to 0.006 once the degenerate-Kriging safeguard is activated, triggering the framework's fallback regime with a \(\pm30\%\) sensitivity range. A Monte Carlo experiment (200 replicates of 9-station, 30-year synthetic regions sampled from a GEV with \(\kappa = -0.10\)) confirms an empirical coverage of 85.0\% within the \(\pm30\%\) interval, and quantifies a systematic underestimation bias of \(-20.7\%\) attributable to fitting Gumbel L-moments to data drawn from a heavy-tailed GEV.

\vspace{0.5cm}
\noindent \textbf{Keywords:} GRADEX, extreme precipitation, L-moments, regional frequency analysis, IDW, Kriging, HEC-HMS, hydrological design, Colombia, Hosking-Wallis

\vspace{1cm}
\hrule
\vspace{0.5cm}

\begin{center}
\textbf{\large Resumen (Spanish)}
\end{center}

\noindent Se presenta GRADEX-DUAL v1.0, un marco computacional de código abierto implementado en R que combina dos capas metodológicas complementarias para la estimación del parámetro GRADEX —definido como la pendiente de la función cuantil de precipitaciones máximas anuales en papel de probabilidad de Gumbel entre los períodos de retorno \(T = 10\) y \(T = 100\) años—, fundamental para la metodología GRADEX-IDF aplicada al diseño hidrológico con HEC-HMS.

La primera capa sigue el enfoque puntual clásico de Guillot y Duband (1967): ajuste de cuatro distribuciones de probabilidad por estación (Gumbel, Log-Normal, Pearson III y Gamma) mediante L-momentos y máxima verosimilitud; selección del mejor modelo en dos etapas (Anderson-Darling seguido de RMSE mínimo); y regionalización al punto de aforo por Distancia Inversa Ponderada (IDW) con exponente optimizado mediante validación cruzada leave-one-out (LOO) u Kriging Ordinario con variograma automático. Una regla de puntuación multi-métrica (RMSE + \(R^2\) + |SESGO|) selecciona el método espacial, con una salvaguarda anti-Kriging-degenerado que descalifica al Kriging cuando la razón de varianza de las predicciones LOO sobre las observaciones cae por debajo de 0,10 —indicando colapso del variograma a un predictor casi constante.

La segunda capa implementa el análisis frecuencial regional de Hosking y Wallis (1997): L-momentos regionales ponderados por longitud de registro, selección de la distribución regional óptima entre cinco distribuciones candidatas (GEV, GLO, GNO, PE3 y GPA) mediante el estadístico \(Z\) de bondad de ajuste calibrado por Montecarlo, e intervalo de confianza al 95\% calculado mediante el algoritmo de simulación bootstrap de H\&W.

Un sistema de ponderación con tres compuertas de calidad integra ambas estimaciones: el peso H\&W se anula cuando la amplitud del IC bootstrap supera el 100\%, el peso espacial se anula cuando el \(R^2\) de la validación cruzada LOO es inferior a 0,3, y el Kriging se descalifica cuando su razón de varianza LOO cae por debajo de 0,10. Los diagnósticos previos incluyen la prueba de Ljung-Box, el estadístico \(H\) de homogeneidad regional, la medida de discordancia \(D\), pruebas de tendencia Mann-Kendall con estimador de pendiente Theil-Sen, un análisis de sensibilidad sobre series detendidas, y un experimento de validación Montecarlo contra regiones sintéticas con GRADEX verdadero conocido.

El marco se evalúa en una red de nueve estaciones IDEAM en la cuenca alta del río Porce (norte de Antioquia, Colombia, 254 estación-años). El Porce es uno de los principales afluentes del río Nechí, el cual entrega sus aguas al río Cauca, quien a su vez desemboca en el Magdalena; el área de estudio pertenece, por tanto, hidrográficamente a la \emph{cuenca baja del río Cauca}. Los resultados demuestran que aún con una región homogénea (\(H = -0{,}92\)) y un tamaño de red que supera el umbral mínimo H\&W, la amplitud del IC bootstrap alcanza el 121\% y el \(R^2\) espacial colapsa a 0,006 una vez activada la salvaguarda anti-Kriging-degenerado, lo que activa el régimen de respaldo del marco con un rango de sensibilidad del \(\pm 30\%\). Un experimento Montecarlo (200 réplicas de regiones sintéticas de 9 estaciones × 30 años muestreadas desde una GEV con \(\kappa = -0{,}10\)) confirma una cobertura empírica del 85,0\% dentro del intervalo \(\pm 30\%\), y cuantifica un sesgo sistemático de subestimación del \(-20{,}7\%\) atribuible al ajuste de L-momentos de Gumbel a datos provenientes de una GEV de cola pesada.

\vspace{0.5cm}
\noindent \textbf{Palabras clave:} GRADEX, precipitación extrema, L-momentos, análisis frecuencial regional, IDW, Kriging, HEC-HMS, diseño hidrológico, Colombia, Hosking--Wallis

\vspace{0.5cm}
\hrule
\vspace{0.5cm}

\section{Introduction}
\subsection{Context and motivation}
The estimation of extreme precipitation for the hydrological design of infrastructure —dams, bridges, sewers, and flood defenses— constitutes one of the most challenging problems in water resources engineering, particularly in regions with sparse rain gauge networks and short record series \cite{stedinger1993}. In Colombia, the density of rain gauge stations is significantly lower than the recommendations of the World Meteorological Organization \cite{omm2008}, which suggests one station per 250–500 km\(^2\) in mountainous regions.

The GRADEX method (GRAdient of EXtreme values), originally proposed by \cite{guillot1967} and subsequently formalized by \cite{naghettini1995} and \cite{galea1997}, offers a physically based approach for extrapolating the design precipitation beyond the available observation period. The GRADEX parameter —defined as the slope of the annual maximum precipitation quantile function plotted on Gumbel probability paper between return periods \(T = 10\) and \(T = 100\) years— encapsulates the asymptotic behavior of the distribution tail and provides a direct link to design flood estimation in the GRADEX-IDF methodology \cite{naghettini1995}.

However, the classic pointwise approach accumulates two sources of uncertainty: distribution parameter estimation at each station and spatial interpolation error. When networks are small —a frequent situation in developing countries— these uncertainties can render the final value unreliable, without any explicit quantification or systematic quality control.

Regional frequency analysis, systematized by Hosking and Wallis~\cite{hosking1997} (hereinafter H\&W), addresses the first source of uncertainty by combining information from hydrologically homogeneous stations. By standardizing the series to a dimensionless form using the index-flood concept and fitting a single regional distribution to the pooled L-moments, the method drastically reduces the variance of parameter estimation, especially at stations with short records. The goodness-of-fit \(Z\) statistic provides a formal, Monte Carlo-calibrated criterion for selecting the regional distribution family, and the H\&W bootstrap simulation algorithm allows deriving a confidence interval with correct nominal coverage by simultaneously propagating several uncertainty sources.

Despite these advantages, a direct comparison between the classic pointwise GRADEX and the H\&W regional approach has not been systematically implemented in a single automated framework with quality gates for HEC-HMS applications. This article presents GRADEX-DUAL v1.0, an open-source R package that fills this methodological and operational gap, and additionally provides a Monte Carlo validation experiment that empirically quantifies the bias and coverage of the framework against synthetic regions with known true GRADEX.

\subsection{Study objectives}
The specific objectives of this work are:
\begin{enumerate}
\item To implement a dual estimation framework that combines pointwise GRADEX \cite{guillot1967} with regional frequency analysis by L-moments \cite{hosking1997} in a single R workflow.
\item To develop an automatic weighting system with three quality gates —H\&W bootstrap CI amplitude, spatial cross-validation \(R^2\), and anti-degenerate-Kriging variance ratio— that penalizes unreliable estimates.
\item To provide a complete set of prior diagnostics that verify the fundamental assumptions of frequency analysis: serial independence, regional homogeneity, absence of discordant stations, and temporal stationarity.
\item To implement a Monte Carlo validation experiment that empirically quantifies the bias, coverage, and RMSE of the framework against synthetic regions with known true GRADEX.
\item To generate publication-quality outputs: maps, cross-validation plots, and a structured Excel workbook with 17 sheets.
\item To evaluate the framework on a representative case study —a nine-station IDEAM network in the upper Porce River basin (northern Antioquia, Colombia), belonging to the Porce–Nechí–Cauca–Magdalena hydrographic system, with 254 station-years— under minimum network conditions recommended by H\&W.
\end{enumerate}

\section{Materials and Methods}
\subsection{Theoretical foundation of the GRADEX parameter}
The GRADEX parameter is defined as the slope of the quantile function \(Q(F)\) in the Gumbel reduced variate plane \(y = -\ln(-\ln F)\) over the range of high return periods \cite{guillot1967}:
\[
\text{GRADEX} = \frac{Q(F_2) - Q(F_1)}{y_2 - y_1}, \tag{1}
\]
where \(F_k = 1 - 1/T_k\), with \(T_1 = 10\) and \(T_2 = 100\) years. The corresponding Gumbel reduced variates are:
\[
y_1 = -\ln(-\ln(1 - 1/10)) = 2.250367, \qquad y_2 = -\ln(-\ln(1 - 1/100)) = 4.600149, \tag{2}
\]
such that \(\Delta y = y_2 - y_1 = 2.349782\) is constant and independent of the fitted distribution.

For the Gumbel distribution (EV1), the GRADEX coincides exactly with the scale parameter \(\beta\). For distributions other than Gumbel (Log-Normal, Pearson III, GEV), the quantile function is curved on Gumbel paper, so equation (1) computes a local chord slope between \(T = 10\) and \(T = 100\) years. This approximation, widely used in applied hydrology \cite{galea1997}, may underestimate the true tail slope for heavy-tailed distributions at return periods exceeding \(T_2\).

\subsection{Probability distributions}
Table~\ref{table:distributions} summarizes the distributions fitted in each layer of the framework and their main properties.

\begin{table}[H]
\centering
\caption{Probability distributions in GRADEX-DUAL. GEV = Generalized Extreme Value; GLO = Generalized Logistic; GNO = Generalized Normal (equivalent to three-parameter Log-Normal, LN-3p); PE3 = Pearson Type III; GPA = Generalized Pareto; ML = maximum likelihood. The two-parameter Log-Normal distribution is fitted by ML in the pointwise layer for consistency with the standard implementation of the \texttt{MASS} package in R; the remaining pointwise distributions and all regional distributions are fitted by L-moments.}
\begin{tabular}{llll}
\toprule
Distribution & Layer & Estimation & Parameters \\ 
\midrule
Gumbel (EV1) & Pointwise & L-moments & \(\mu\) (location), \(\beta\) (scale) \\
Log-Normal (LN-2p) & Pointwise & ML & \(\mu\) (meanlog), \(\sigma\) (sdlog) \\
Pearson III & Pointwise & L-moments & \(\mu\), \(\sigma\), \(\gamma\) (skewness) \\
Gamma & Pointwise & L-moments & \(\alpha\) (shape), \(\beta\) (scale) \\
GEV & Regional & L-moments & \(\mu\), \(\alpha\), \(\kappa\) (shape) \\
GLO & Regional & L-moments & \(\mu\), \(\alpha\), \(\kappa\) \\
GNO (LN-3p) & Regional & L-moments & \(\mu\), \(\alpha\), \(\kappa\) \\
PE3 & Regional & L-moments & \(\mu\), \(\sigma\), \(\gamma\) \\
GPA & Regional & L-moments & \(\mu\), \(\alpha\), \(\kappa\) \\
\bottomrule
\end{tabular}
\label{table:distributions}
\end{table}

\subsection{Estimation by L-moments}
L-moments \cite{hosking1990} are linear combinations of order statistics that offer greater robustness against outliers compared to conventional product moments. The \(r\)-th L-moment is defined as:
\[
\lambda_r = \sum_k p_{r,k}^* \beta_k, \quad \text{where } \beta_k = \mathbb{E}[X \cdot F(X)^k]. \tag{3}
\]
and \(p_{r,k}^*\) are the coefficients of the shifted Legendre polynomial (see \cite{hosking1990} for details). The dimensionless L-moment ratios are: L-CV (\(\tau_2 = \lambda_2/\lambda_1\)), L-skewness (\(\tau_3 = \lambda_3/\lambda_2\)), and L-kurtosis (\(\tau_4 = \lambda_4/\lambda_2\)).

\subsection{Regional L-moment algorithm (H\&W, eq. 4.3)}
In the H\&W layer, each series is first standardized by the station mean (index-flood step):
\[
x_{i,j} = \frac{X_{i,j}}{\mu_i}, \quad j = 1, \dots, n_i. \tag{4}
\]
Regional L-moment ratios are calculated as record-length-weighted averages:
\[
\tau_r^R = \frac{\sum_i n_i \tau_r^{(i)}}{\sum_i n_i}, \tag{5}
\]
where \(\tau_r^{(i)}\) is the \(r\)-th L-moment ratio at station \(i\), \(n_i\) is the record length, and \(N = \sum_i n_i\) is the total station-years.

\subsection{Goodness-of-fit \(Z\) statistic (H\&W, eq. 5.6)}
For each candidate distribution \(\mathcal{D}\), the theoretical L-kurtosis \(\tau_4^{\mathcal{D}}\) is calculated from the parameters fitted to \([1, \tau_2^R, \tau_3^R]\). The \(Z\) statistic measures the discrepancy between the theoretical and observed regional L-kurtosis in units of the Monte Carlo standard deviation:
\[
Z^{\mathcal{D}} = \frac{\tau_4^{\mathcal{D}} - \tau_4^R}{\sigma_4}. \tag{6}
\]
where \(\sigma_4\) is estimated by simulating \(N_{\text{sim}} = 500\) homogeneous synthetic regions from a four-parameter kappa distribution fitted to the regional L-moments. The distribution with the smallest \(|Z^{\mathcal{D}}|\) among those satisfying \(|Z^{\mathcal{D}}| < 1.64\) (two-sided 10\% significance) is selected.

\subsection{Regional bootstrap confidence interval}
\label{sec:bootstrap}
The 95\% confidence interval is computed using the bootstrap simulation algorithm described in Section 6.3 of \cite{hosking1997}. For each of \(n_{\text{boot}}\) replicates, the following five steps are executed:

\begin{enumerate}
\item Simulate a homogeneous synthetic region by drawing independent samples of sizes \(n_1, \dots, n_k\) from the four-parameter kappa distribution fitted to the observed regional L-moments.
\item Re-estimate the regional L-moment ratios \(\tau_2^R\) and \(\tau_3^R\) from the synthetic region.
\item Re-fit the selected regional distribution to the synthetic regional L-moments.
\item Calculate GRADEX at each synthetic station using equation (1).
\item Re-interpolate the GRADEX values to the gauging point using IDW.
\end{enumerate}

The 2.5 and 97.5 percentiles of the bootstrap distribution constitute the 95\% confidence interval.

\subsection{Spatial interpolation methods}
\subsubsection{Inverse Distance Weighting (IDW)}
The IDW estimator at the ungauged point \(x_0\) is:
\[
\hat{z}(x_0) = \frac{\sum_i w_i(x_0) z_i}{\sum_i w_i(x_0)}, \quad w_i = d_i^{-p}, \tag{7}
\]
where \(z_i\) is the GRADEX at station \(i\), \(d_i\) is the Euclidean distance from station \(i\) to \(x_0\), and \(p\) is the decay exponent. The optimal exponent is determined by minimizing the LOO cross-validation RMSE over the set of candidate values \(p \in \{1.0, 1.5, 2.0, 3.0\}\).

\subsubsection{Ordinary Kriging}
Ordinary Kriging produces the Best Linear Unbiased Predictor (BLUP) by solving the system:
\[
\begin{bmatrix}
\gamma(h_{11}) & \cdots & \gamma(h_{1n}) & 1 \\
\vdots & \ddots & \vdots & \vdots \\
\gamma(h_{n1}) & \cdots & \gamma(h_{nn}) & 1 \\
1 & \cdots & 1 & 0
\end{bmatrix}
\begin{bmatrix}
\lambda_1 \\ \vdots \\ \lambda_n \\ \mu
\end{bmatrix}
=
\begin{bmatrix}
\gamma(h_{10}) \\ \vdots \\ \gamma(h_{n0}) \\ 1
\end{bmatrix}, \tag{8}
\]
where \(\gamma(h)\) is the semivariogram at lag \(h\), automatically fitted using the \texttt{automap} package \cite{hiemstra2009}.

\subsubsection{Multi-metric selection of the spatial method}
The interpolation method is selected using a three-criterion scoring system evaluated with LOO cross-validation. Each criterion awards one point to the best-performing method: RMSE (lower wins), \(R^2\) (higher wins), and |BIAS| (lower absolute value wins). Ties are resolved by lower RMSE.

\subsection{Weighting system with quality gates}
The final GRADEX recommendation integrates both layers through a weighting scheme with three explicit gates.

\subsubsection{Gate A: H\&W bootstrap CI amplitude}
The relative amplitude of the bootstrap CI is defined as:
\[
A = \frac{\text{CI}_{\text{upper}} - \text{CI}_{\text{lower}}}{\text{GRADEX}_{\text{HW}}} \times 100\%. \tag{9}
\]
A linear penalty factor is applied to the H\&W weight: 
\[\text{ci\_factor} = \max(0, 1 - A/100).\] 
When \(A \geq 100\%\), \(\text{ci\_factor} = 0\) and the H\&W weight is nullified regardless of any other metric.

\subsubsection{Gate B: spatial cross-validation \(R^2\)}
When the LOO cross-validation \(R^2\) of the selected spatial method is below 0.3, the spatial weight is nullified.

\subsubsection{Gate C: anti-degenerate-Kriging variance ratio}
When the variogram approaches pure nugget (\(N/S \to 1\)), Ordinary Kriging collapses toward the regional mean: all LOO predictions concentrate near a constant value regardless of station location. Under this regime, \(R^2\) and |BIAS| can indicate spuriously favorable metrics for Kriging while RMSE remains unimproved over IDW. To detect this pathological case, the variance ratio is defined as:
\[
r = \frac{\mathrm{Var}\bigl(\hat{z}_{\text{LOO}}\bigr)}{\mathrm{Var}\bigl(z_{\text{obs}}\bigr)}. \tag{10}
\]
When \(r < 0.10\), Kriging predictions span less than 10\% of the observed variance and the method is \emph{disqualified} from multi-metric selection regardless of its score; IDW is used instead. The 0.10 threshold is conservative: a healthy Kriging on real spatial structure typically produces \(r > 0.40\).

\subsubsection{Composite weighting}
When both methods pass their respective gates, the composite weights are calculated as:
\[
w_{\text{spatial}} = \frac{R^2}{R^2 + \text{ci\_factor} \cdot \max\left(0, \frac{2 - |Z|}{2}\right)}, \quad w_{\text{HW}} = 1 - w_{\text{spatial}}. \tag{11}
\]

\subsection{Prior diagnostics}
\subsubsection{Serial independence: Ljung-Box test}
The Ljung-Box portmanteau test \cite{ljung1978} evaluates the null hypothesis that the first \(L\) autocorrelation coefficients are jointly zero:
\[
Q_{LB} = n(n+2) \sum_{k=1}^{L} \frac{\hat{\rho}_k^2}{n-k} \sim \chi^2(L). \tag{12}
\]
The number of lags is set as \(L = \min(\text{LJUNG\_BOX\_LAGS}, \lfloor n/4 \rfloor)\).

\subsubsection{Regional homogeneity: \(H\) statistic}
The \(H\) statistic of \cite{hosking1997} compares the observed dispersion of L-CV values with that expected under perfect homogeneity:
\[
V_{\text{obs}} = \left[ \frac{\sum_i n_i (\tau_2^{(i)} - \tau_2^R)^2}{\sum_i n_i} \right]^{1/2}, \quad H = \frac{V_{\text{obs}} - \mu_V}{\sigma_V}. \tag{13}
\]
The conventional interpretation is: \(H < 1\) = acceptably homogeneous; \(1 \leq H < 2\) = possibly heterogeneous; \(H \geq 2\) = definitely heterogeneous.

\subsubsection{Discordancy measure \(D\)}
The discordancy measure identifies stations whose L-moment ratios differ markedly from the regional average:
\[
D_i = \frac{N_{\text{st}}}{3} \mathbf{u}_i^\top \mathbf{A}^{-1} \mathbf{u}_i, \quad \mathbf{u}_i = [\tau_2, \tau_3, \tau_4]^\top - \bar{\mathbf{u}}. \tag{14}
\]
Stations with \(D_i > 3.0\) are classified as discordant and it is recommended that they be reviewed before inclusion in the regional analysis.

\subsubsection{Temporal trend: Mann--Kendall and Theil--Sen slope}
The non-parametric Mann--Kendall test evaluates the null hypothesis of no monotonic trend in a time series using the statistic:
\[
S = \sum_{i=1}^{n-1} \sum_{j=i+1}^{n} \mathrm{sgn}(x_j - x_i), \tag{15}
\]
and the trend magnitude is robustly estimated using the median Theil--Sen slope:
\[
\beta_{\text{Sen}} = \mathrm{med}_{i<j}\left\{ \frac{x_j - x_i}{t_j - t_i} \right\}. \tag{16}
\]

\subsubsection{Detrended sensitivity analysis}
When any station exhibits a significant trend, the classical frequency analysis violates the stationarity assumption. To quantify the impact of this bias, GRADEX-DUAL computes detrended series by subtracting the Sen trend:
\[
\tilde{x}_{i,t} = x_{i,t} - \beta_{\text{Sen}}^{(i)} \cdot (t - \bar{t}_i), \tag{17}
\]
and re-estimates the GRADEX per station on the residual series. The percentage difference relative to the stationary GRADEX is reported as a sensitivity measure: a maximum difference exceeding 15\% triggers an explicit warning in the framework output.

\subsubsection{Monte Carlo validation of the framework}
To empirically quantify the bias, RMSE, and coverage of the framework, GRADEX-DUAL includes an optional Monte Carlo validation experiment. In each replicate, a synthetic region is simulated with: (i) \(K\) stations whose locations are random within a 50 km × 50 km square; (ii) index-flood factors \(\mu_i \sim \mathcal{N}(\mu_\mu, \sigma_\mu^2)\) per station; and (iii) annual series of length \(n_{\text{yrs}}\) sampled from a GEV distribution with parameters \((\mu_i, \alpha_i, \kappa)\) such that the target regional L-CV remains constant. The \(\mathrm{GRADEX}_{\text{true}}\) is computed analytically from the known parameters:
\[
\mathrm{GRADEX}_{\text{true}} = \frac{q_{100} - q_{10}}{y_{100} - y_{10}}, \tag{18}
\]
and compared with the GRADEX estimated by the framework. The reported metrics are: bias (mean and median in mm and \%), RMSE, P95 of absolute relative bias, and coverage of the \(\pm 30\%\) sensitivity interval.

\subsection{Software architecture}
GRADEX-DUAL v1.0 is structured into 20 sections in a single R script. Table~\ref{table:functions} presents the inventory of the main functions.

\begin{table}[H]
\centering
\caption{Inventory of main GRADEX-DUAL v1.0 functions}
\resizebox{\textwidth}{!}{%
\begin{tabular}{lp{0.6\textwidth}}
\toprule
Function & Description \\
\midrule
\texttt{configure\_workspace()} & Working directory and global parameters. \\
\texttt{load\_precipitation\_data()} & Reading and validation of the input Excel file. \\
\texttt{compute\_station\_gradex()} & Fitting of 4 distributions; two-stage selection; per-station bootstrap CI. \\
\texttt{run\_independence\_tests()} & Ljung-Box serial independence test. \\
\texttt{run\_homogeneity\_tests()} & H\&W \(H\) statistic and discordancy measure \(D\). \\
\texttt{theil\_sen\_slope()}, \texttt{run\_trend\_analysis()} & Sen slope (Theil--Sen) and Mann-Kendall test. \\
\texttt{run\_detrended\_sensitivity()} & GRADEX re-estimation on Sen-detrended series. \\
\texttt{optimise\_idw\_power()} & Grid search LOO-CV for optimal IDW exponent. \\
\texttt{run\_cross\_validation\_both()} & LOO-CV for IDW and Kriging; multi-metric selection with anti-degenerate-Kriging safeguard. \\
\texttt{compute\_regional\_lmom\_hw()} & Record-length-weighted regional L-moments (H\&W eq. 4.3). \\
\texttt{select\_hw\_distribution()} & Distribution selection by \(Z\) statistic (H\&W eq. 5.6). \\
\texttt{bootstrap\_hw\_ci()} & H\&W bootstrap CI (Section 6.3). \\
\texttt{compute\_weighted\_recommendation()} & Weighting with three quality gates. \\
\texttt{run\_mc\_validation()} & Monte Carlo validation against known true GRADEX. \\
\texttt{export\_results\_excel()} & Excel workbook with 17 result sheets. \\
\bottomrule
\end{tabular}%
}
\label{table:functions}
\end{table}

\subsection{Configurable parameters}
Table~\ref{table:parameters} lists the main parameters configurable by the user in Section 1 of the script.

\begin{table}[H]
\centering
\caption{Configurable parameters in GRADEX-DUAL v1.0}
\resizebox{\textwidth}{!}{%
\begin{tabular}{lll}
\toprule
Parameter & Default value & Description \\
\midrule
\texttt{LON\_GAUGE} / \texttt{LAT\_GAUGE} & — & Coordinates of the gauging point (WGS84). \\
\texttt{MIN\_YEARS} & 10 & Minimum record length (years). \\
\texttt{WARN\_YEARS} & 30 & Warning threshold for Q100 (years). \\
\texttt{CONFIDENCE\_LEVEL} & 0.95 & Confidence level for all CIs. \\
\texttt{N\_BOOTSTRAP} & 2000 & Bootstrap replicates for the regional CI. \\
\texttt{IDW\_POWER\_GRID} & \{1; 1.5; 2; 3\} & Candidate IDW exponents. \\
\texttt{HW\_NSIM} & 500 & Monte Carlo simulations for \(\sigma_4\) and bootstrap CI. \\
\texttt{KRIGING\_NUGGET\_SILL\_MAX} & 0.5 & N/S threshold to indicate strong spatial structure. \\
\texttt{RUN\_MC\_VALIDATION} & \texttt{TRUE} & Activates Monte Carlo validation of the framework. \\
\texttt{MC\_N\_REPLICATES} & 200 & Monte Carlo replicates for validation. \\
\texttt{RANDOM\_SEED} & 2024 & Seed for reproducibility. \\
\bottomrule
\end{tabular}%
}
\label{table:parameters}
\end{table}

\section{Results}
\subsection{Case study: Upper Porce River Basin, Antioquia, Colombia}
\label{sec:case}
The framework was evaluated with a network of \textbf{nine IDEAM rain gauge stations} in the upper Porce River basin (northern Antioquia department, Colombia), a region characterized by complex Andean topography with a strong orographic effect on precipitation. The Porce River originates in the northern highlands of Antioquia and constitutes one of the main tributaries of the Nechí River, which discharges its waters into the Cauca River downstream of the Antioquia canyon; the study area therefore belongs, in macro-regional hydrographic terms, to the \textbf{lower Cauca River basin}, within the Cauca–Magdalena system. The stations cover the Santa Rosa de Osos – Yarumal – Belmira – Carolina del Príncipe sector, with elevations between 1,450 and 2,700 m a.s.l. and record lengths of 25 to 30 years, totaling 254 station-years. The target gauging point is located at longitude \(-75.48^\circ\) and latitude \(6.65^\circ\) (WGS84 system), near the municipality of Santa Rosa de Osos, within the polygon covered by the network.

\begin{table}[H]
\centering
\caption{Characteristics of the nine IDEAM rain gauge stations of the case study.}
\resizebox{\textwidth}{!}{%
\begin{tabular}{lcccc}
\toprule
Station & Longitude (°) & Latitude (°) & Elevation (m a.s.l.) & Record (years) \\
\midrule
El Vergel              & \(-75.523\) & \(6.628\) & 2,700 & 30 \\
San Andrés             & \(-75.674\) & \(6.812\) & 1,450 & 30 \\
El Chaquiro            & \(-75.485\) & \(6.541\) & 2,580 & 28 \\
Santa Rosa de Osos     & \(-75.461\) & \(6.640\) & 2,550 & 30 \\
Yarumal                & \(-75.418\) & \(6.964\) & 2,300 & 27 \\
San Pedro de los Milagros & \(-75.557\) & \(6.459\) & 2,475 & 30 \\
Don Matías             & \(-75.396\) & \(6.486\) & 2,200 & 26 \\
Entrerríos             & \(-75.518\) & \(6.567\) & 2,300 & 28 \\
Carolina del Príncipe  & \(-75.282\) & \(6.726\) & 1,750 & 25 \\
\bottomrule
\end{tabular}%
}
\label{table:stations}
\end{table}

\noindent \textbf{Note on the data:} the series were synthetically calibrated to the documented regional climatology (mean annual maximum precipitations between 92 and 143 mm, L-CV \(\approx 0.25\)–\(0.32\), regional GEV distribution with \(\kappa \approx -0.10\)). This calibration preserves the statistical properties of real IDEAM networks and allows reproducing the experiment with the documented random seed.

\subsection{Temporal trend analysis and non-stationarity}
Figure~\ref{fig:trend} presents the Mann--Kendall trend analysis for the annual maximum precipitation series of the nine stations, with the robust Theil--Sen slope superimposed on each series. This analysis is fundamental to verify the stationarity assumption required by traditional frequency analysis \cite{stedinger1993, khaliq2006}.

\begin{figure}[H]
\centering
\includegraphics[width=\textwidth]{analisis_tendencia.png}
\caption{Mann--Kendall trend analysis of the annual maximum precipitation series for the nine IDEAM stations of the case study. The thick lines represent the robust Theil--Sen slope per station, consistent with the values reported in Table~\ref{table:trend}.}
\label{fig:trend}
\end{figure}

The detailed results of the test are summarized in Table~\ref{table:trend}.

\begin{table}[H]
\centering
\caption{Results of the Mann--Kendall trend test for the nine stations of the case study. Slope estimated by the Theil--Sen method.}
\resizebox{\textwidth}{!}{%
\begin{tabular}{lcccc}
\toprule
\textbf{Station} & \textbf{Kendall's \(\tau\)} & \textbf{\(p\)-value} & \textbf{Sen slope (mm/yr)} & \textbf{Direction} \\
\midrule
Santa Rosa de Osos     & \(+0.101\)  & \(0.443\) & \(+0.396\) & Increasing \\
Yarumal                & \(-0.194\)  & \(0.162\) & \(-0.753\) & Decreasing \\
El Chaquiro            & \(-0.048\)  & \(0.737\) & \(-0.212\) & Decreasing \\
San Pedro de los Milagros & \(-0.039\) & \(0.775\) & \(-0.226\) & Decreasing \\
Don Matías             & \(+0.093\)  & \(0.523\) & \(+0.512\) & Increasing \\
Entrerríos             & \(-0.143\)  & \(0.295\) & \(-0.412\) & Decreasing \\
Carolina del Príncipe  & \(-0.084\)  & \(0.575\) & \(-0.361\) & Decreasing \\
El Vergel              & \(-0.009\)  & \(0.957\) & \(-0.073\) & Decreasing \\
San Andrés             & \(-0.058\)  & \(0.668\) & \(-0.406\) & Decreasing \\
\bottomrule
\end{tabular}%
}
\label{table:trend}
\end{table}

\noindent \textbf{Key result:} \emph{none of the nine stations exhibits a statistically significant trend at the \(\alpha = 0.05\) level} (all \(p\)-values exceed the 0.10 threshold, being classified as \textit{Not significant}). The Sen slopes range within a narrow interval between \(-0.75\) and \(+0.51\) mm yr\(^{-1}\), values that in the context of maximum precipitations with regional means between 92 and 143 mm represent change rates of less than 0.5\%/yr. This result satisfies the stationarity assumption required by the subsequent frequency analysis.

\subsubsection{Detrended sensitivity analysis}
Although no trend is significant, the sensitivity analysis on the Sen-detrended series (Table~\ref{table:sensitivity}) reveals relative differences that in some stations exceed 15\%, which triggers the methodological warning of the framework. This demonstrates that non-significant trends can nonetheless appreciably affect extreme quantile estimates in small samples, and constitutes an additional argument for reporting results with a wide sensitivity range.

\begin{table}[H]
\centering
\caption{Sensitivity analysis: pointwise GRADEX on original vs. Sen-detrended series. Negative \(\Delta\) indicates underestimation upon detrending (the trend inflated the GRADEX).}
\resizebox{\textwidth}{!}{%
\begin{tabular}{lcccc}
\toprule
\textbf{Station} & \textbf{Stationary (mm)} & \textbf{Detrended (mm)} & \textbf{\(\Delta\) (mm)} & \textbf{\(\Delta\%\)} \\
\midrule
Santa Rosa de Osos     & 18.6 & 18.4 & \(-0.2\) & \(-1.1\%\) \\
Yarumal                & 25.2 & 25.2 & \(0.0\)  & \(0.0\%\)  \\
El Chaquiro            & 17.4 & 13.9 & \(-3.5\) & \(-20.1\%\) \\
San Pedro de los Milagros & 10.8 & 10.6 & \(-0.2\) & \(-1.9\%\) \\
Don Matías             & 24.9 & 23.4 & \(-1.5\) & \(-6.0\%\) \\
Entrerríos             & 16.5 & 10.9 & \(-5.6\) & \(-34.0\%\) \\
Carolina del Príncipe  & 33.0 & 32.4 & \(-0.6\) & \(-1.8\%\) \\
El Vergel              & 30.7 & 30.7 & \(0.0\)  & \(0.0\%\)  \\
San Andrés             & 27.7 & 27.6 & \(-0.1\) & \(-0.4\%\) \\
\midrule
\multicolumn{4}{l}{\textbf{Max \(|\Delta\%|\)} = 34.0\% (Entrerríos)}    & \multicolumn{1}{r}{\textbf{Mean} = 7.3\%} \\
\bottomrule
\end{tabular}%
}
\label{table:sensitivity}
\end{table}

\subsection{Cross-validation and spatial method selection}
\label{sec:cv}
LOO cross-validation is executed on both spatial methods (IDW and Ordinary Kriging), and the final selection is based on a multi-metric rule supplemented by the anti-degenerate-Kriging safeguard of Gate C.

\subsubsection{Variogram fitting}
The automatic variogram fitting (\texttt{automap} package) selects the spherical model (Sph) with a range of 7,061 m and a nugget/sill ratio of 0.981 —close to pure nugget. This N/S value indicates an almost total absence of detectable spatial structure at the network scale.

\subsubsection{Detection of degenerate Kriging}
Although the preliminary multi-metric scoring apparently favored Kriging (it wins in \(R^2\) and |BIAS|), the Gate C safeguard detects the pathological regime:
\[
r = \frac{\mathrm{Var}(\hat{z}_{\text{LOO,Kriging}})}{\mathrm{Var}(z_{\text{obs}})} = 0.0146 \;<\; 0.10. \tag{19}
\]
Kriging predictions span less than 1.5\% of the observed variance (Figure~\ref{fig:cv_kriging}, right panel), confirming the variogram collapse to a near-constant predictor. The system then disqualifies Kriging and selects IDW (\(p_{\text{opt}} = 2.0\)) as the spatial method.

\begin{figure}[H]
\centering
\includegraphics[width=0.95\textwidth]{validacion_cruzada_idw.png}
\caption{LOO cross-validation for IDW (\(p_{\text{opt}} = 2.0\)) over the nine-station network. \(R^2 = 0.006\), RMSE = 7.46 mm, |BIAS| = 1.47 mm, MAE = 6.42 mm. The scattered cloud and the nearly horizontal green regression line show that IDW does not capture the real spatial structure of GRADEX in this basin, which triggers Gate B (\(R^2 < 0.3\)) in the weighting phase.}
\label{fig:cv_idw}
\end{figure}

\begin{figure}[H]
\centering
\includegraphics[width=0.95\textwidth]{validacion_cruzada_kriging.png}
\caption{LOO cross-validation for Ordinary Kriging (Sph variogram, N/S = 0.98). The apparent \(R^2\) of 0.953 is an \emph{artifact} of the collapsed variance of the predictions (range between 21.5 and 24.3 mm for observations ranging from 10.8 to 33.0 mm): the negative slope of the green line indicates anti-correlation with respect to the ideal \(y=x\) line (dashed red). The anti-degenerate-Kriging safeguard (Gate C, eq.~10) detects this pattern and disqualifies Kriging.}
\label{fig:cv_kriging}
\end{figure}

Table~\ref{table:cv_comparison} summarizes the comparative metrics of both methods.

\begin{table}[H]
\centering
\caption{Comparison of LOO-CV metrics between IDW and Kriging for the case study.}
\resizebox{\textwidth}{!}{%
\begin{tabular}{lccc}
\toprule
\textbf{Metric} & \textbf{IDW} (\(p = 2.0\)) & \textbf{Kriging} (Sph) & \textbf{Best} \\
\midrule
RMSE (mm) & 7.46 & 7.75 & IDW \\
\(R^2\) & 0.006 & 0.953 & Kriging (artifact) \\
\(|\)BIAS\(|\) (mm) & 1.47 & 0.07 & Kriging (artifact) \\
MAE (mm) & 6.42 & 6.87 & IDW \\
Variance ratio \(r\) & — & 0.0146 & — \\
\textbf{Diagnosis} & — & \multicolumn{2}{c}{\textbf{Kriging DEGENERATE} (\(r < 0.10\)) — DISQUALIFIED} \\
\midrule
\multicolumn{4}{l}{\textbf{Selected method:} IDW} \\
\bottomrule
\end{tabular}%
}
\label{table:cv_comparison}
\end{table}

\noindent \textbf{Statistical warning:} with only \(n = 9\) stations, the LOO cross-validation \(R^2\) is subject to high uncertainty. RMSE relative to the regional mean (\(\text{RMSE}/\bar{z} = 7.46/20.4 \approx 36.6\%\)) constitutes a more informative complementary metric.

\subsection{Interpolated GRADEX map}
Figure~\ref{fig:map} presents the GRADEX map interpolated using the selected method (IDW with \(p_{\text{opt}} = 2.0\)) in the MAGNA-SIRGAS / CTM12 coordinate system (EPSG:9377). A pronounced spatial gradient is observed, with values ranging from approximately 11 mm in the San Pedro de los Milagros sector (south of the network) to 33 mm in Carolina del Príncipe (east of the network). The gauging point (red triangle) is located in a transition zone with an estimated pointwise GRADEX of \textbf{20.40 mm}.

\begin{figure}[H]
\centering
\makebox[\textwidth][c]{\includegraphics[width=1.25\textwidth]{mapa_gradex_final.png}}
\caption{GRADEX interpolation map using IDW (exponent \(p_{\text{opt}} = 2.0\)) over the nine-station IDEAM network. The interpolated surface shows the spatial gradient between the minimum value (San Pedro de los Milagros, 10.8 mm) and the maximum value (Carolina del Príncipe, 33.0 mm). The gauging point (red triangle) is located in the transition zone with a pointwise GRADEX of 20.40 mm and 95\% CI: [18.0; 27.6] mm. Coordinate system: MAGNA-SIRGAS / CTM12 (EPSG:9377).}
\label{fig:map}
\end{figure}

Figure~\ref{fig:map_comp} presents the comparative panel IDW vs. Ordinary Kriging, visually demonstrating the pathological Kriging regime detected by Gate C: while the IDW map reproduces the real spatial gradient between stations, the Kriging map collapses to an almost constant value (range 22.6–22.9 mm throughout the domain).

\begin{figure}[H]
\centering
\makebox[\textwidth][c]{\includegraphics[width=1.25\textwidth]{mapa_comparacion_metodos.png}}
\caption{Comparative panel IDW (left, \emph{selected}) vs.\ Ordinary Kriging (right) over the nine-station network. The Kriging map shows the degenerate regime: the color scale covers barely 0.3 mm of range (22.6–22.9 mm) compared to the 22 mm range of IDW, evidencing the variogram collapse to a constant predictor. The anti-degenerate-Kriging safeguard (eq.~10) detects this pattern through the variance ratio \(r = 0.0146\).}
\label{fig:map_comp}
\end{figure}

\subsection{Diagnostic and estimation results}
Table~\ref{table:results} summarizes the main diagnostic and estimation results of the case study, incorporating the information visualized in the preceding figures.

\begin{table}[H]
\centering
\caption{Summary of results for the case study in the upper Porce River basin (9 IDEAM stations, 254 station-years).}
\resizebox{\textwidth}{!}{%
\small
\begin{tabular}{l p{0.55\textwidth}}
\toprule
\textbf{Diagnosis / Estimation} & \textbf{Result} \\
\midrule
Number of stations & 9 \\
Total station-years & 254 \\
Network classification (H\&W) & ACCEPTABLE (8–14 stations) \\
Serial independence (Ljung--Box) & \(9/9\) stations support i.i.d. (\(p > 0.05\)) \\
Regional homogeneity (\(H\)) & \(-0.918\) — HOMOGENEOUS (\(H < 1\)) \\
Maximum discordancy \(D\) & \(D_{\max} = 2.057\) (El Vergel) — none discordant \\
Mann--Kendall trend & No station with significant trend (\(p > 0.10\)) \\
Detrended sensitivity \(|\Delta|_{\max}\) & 34.0\% (Entrerríos) — warning triggered \\
\midrule
\multicolumn{2}{l}{\textbf{Layer 1: Pointwise estimation + spatial interpolation}} \\
Pointwise GRADEX IDW (\(p = 2.0\)) & \textbf{20.40 mm} \\
IDW cross-validation \(R^2\) & 0.006 — INADEQUATE (\(< 0.3\)) \\
Cross-validation RMSE / MAE / |BIAS| & 7.46 / 6.42 / 1.47 mm \\
Kriging variogram (Sph) & N/S = 0.981, range = 7,061 m \\
Kriging cross-validation \(R^2\) (apparent) & 0.953 — \emph{artifact} of collapsed variogram \\
Kriging variance ratio \(r\) & 0.0146 — DEGENERATE (\(< 0.10\)) \\
Gate C triggered & YES — Kriging disqualified \\
\midrule
\multicolumn{2}{l}{\textbf{Layer 2: H\&W regional frequency analysis}} \\
Regional L-CV & 0.1533 \\
Regional L-skewness & 0.1469 \\
Regional L-kurtosis & 0.1478 \\
\(\sigma_4\) (Monte Carlo, n=500) & 0.02193 \\
H\&W regional distribution (\(Z\) test) & GEV, \(|Z| = 0.28\) — PASSES \\
\(Z\) test power & MODERATE (200–500 sta-yrs) \\
H\&W GRADEX at gauging point & 22.38 mm \\
H\&W 95\% bootstrap CI & [10.72; 37.80] mm \\
Bootstrap CI amplitude & \textbf{121.0\%} — UNRELIABLE (\(\geq 100\%\)) \\
\midrule
\multicolumn{2}{l}{\textbf{Gate system and final recommendation}} \\
Gate A (CI amplitude) & TRIGGERED — H\&W weight = 0\% \\
Gate B (\(R^2 < 0.3\)) & TRIGGERED — spatial weight = 0\% \\
Active regime & \textit{Fallback} — both methods unreliable \\
Recommended GRADEX (base value) & \textbf{20.40 mm} (pointwise IDW) \\
Sensitivity range (\(\pm 30\%\)) & [14.28; 26.52] mm \\
\bottomrule
\end{tabular}%
}
\label{table:results}
\end{table}

\subsection{Monte Carlo validation of the framework}
\label{sec:mc}
To empirically quantify the bias, RMSE, and coverage of the GRADEX-DUAL framework, the Monte Carlo validation experiment described in Section 2 was executed with the following parameters: \(K = 9\) stations per replicate (equal to the study network), \(n_{\text{yrs}} = 30\) years per station, regional GEV distribution with \(\kappa = -0.10\), target regional L-CV \(= 0.18\), mean index-flood factor \(\mu_\mu = 110\) mm with inter-station dispersion \(\sigma_\mu = 18\) mm, and 200 replicates. Table~\ref{table:mc} summarizes the results.

\begin{table}[H]
\centering
\caption{Results of the Monte Carlo validation of the GRADEX-DUAL framework (200 replicates, 9 stations, 30 years per station, true distribution GEV with \(\kappa = -0.10\)).}
\begin{tabular}{lc}
\toprule
\textbf{Metric} & \textbf{Value} \\
\midrule
Valid replicates & 200 / 200 (100\%) \\
Mean true GRADEX & 36.54 mm \\
Mean estimated GRADEX & 28.97 mm \\
Mean bias & \(-7.569\) mm \(\;(-20.73\%)\) \\
Median bias & \(-7.752\) mm \\
RMSE & 8.319 mm \\
P95 \(|\)relative bias\(|\) & 34.10\% \\
\textbf{Coverage of \(\pm 30\%\) interval} & \textbf{85.0\%} \\
\bottomrule
\end{tabular}
\label{table:mc}
\end{table}

\subsubsection{Interpretation of Monte Carlo results}
The validation reveals three fundamental findings for framework calibration:

\textbf{(i) Systematic underestimation bias of \(-20.7\%\).} The framework consistently underestimates the true GRADEX. This bias is attributable to the use of Gumbel L-moments in the pointwise layer when the data actually come from a heavy-tailed GEV (\(\kappa < 0\)): the Gumbel chord slope underestimates the true tail curvature. For design applications in regions where the true regional distribution is GEV with \(\kappa < 0\), it is recommended to apply a correction factor of \(\sim +25\%\) to the final GRADEX, or to adopt GEV-L-moments directly instead of Gumbel.

\textbf{(ii) Empirical coverage of 85\% within the \(\pm 30\%\) interval.} This result provides the first \emph{empirical justification} of the \(\pm 30\%\) sensitivity range of the framework's \textit{fallback} regime. Although the range does not achieve the nominal 95\% coverage, it does cover 85\% of cases —an acceptable coverage level for preliminary hydrological design given the minimum network regime.

\textbf{(iii) RMSE of 8.32 mm.} In units relative to the mean true GRADEX (36.5 mm), the RMSE represents 22.8\% of the central value. This is consistent with the typical uncertainty reported in the literature for frequency analysis of extreme quantiles in networks with \(< 500\) station-years \cite{stedinger1993}, and quantitatively supports the decision to adopt a wide sensitivity range in the \textit{fallback} regime.

\subsection{Output products}
The module automatically generates the following products:

\begin{itemize}
\item \textbf{Excel workbook} (\texttt{resultados\_gradex\_completos.xlsx}) with 17 sheets.
\item \textbf{Maps in PNG format at 300 DPI}: main map of the selected method and comparative panel IDW vs. Kriging.
\item \textbf{Cross-validation plots} for IDW and Kriging.
\item \textbf{Mann-Kendall trend plot}.
\item \textbf{Shapefiles} in MAGNA-SIRGAS/CTM12 (EPSG:9377) of stations and gauging point.
\item \textbf{Session report} (\texttt{session\_info.txt}) for reproducibility.
\end{itemize}

\section{Discussion}
\subsection{Analysis of prior diagnostics}
The prior diagnostics confirm the statistical validity of the regional analysis: the nine stations support the i.i.d. assumption of Ljung--Box (\(p > 0.05\) in all), the region is statistically homogeneous (\(H = -0.918 < 1\)), no station is discordant (\(D_{\max} = 2.06 < 3.0\)), and no series exhibits a significant trend (all \(p\)-values \(> 0.10\)). The detrended sensitivity analysis reveals that, although the trends are not significant, in some stations (notably Entrerríos) subtracting the Sen slope modifies the GRADEX by up to 34\%; this result motivates the joint reporting of the \(|\Delta|_{\max}\) metric as a robustness indicator.

\subsection{Spatial cross-validation and the anti-degenerate-Kriging safeguard}
The IDW cross-validation \(R^2\) of 0.006 indicates that the spatial structure of GRADEX in the basin is not captured by distance-weighted interpolation in this network. This result triggers Gate B of the weighting system, nullifying the spatial weight.

The behavior of Ordinary Kriging deserves special analysis. The apparent LOO metrics (\(R^2 = 0.953\), |BIAS| = 0.07 mm) superficially suggest that Kriging outperforms IDW (whose validation plot is presented in Figure~\ref{fig:cv_idw}). However, inspection of the residuals (Figure~\ref{fig:cv_kriging}) reveals that the predictions concentrate in a very narrow range (21.5–24.3 mm) compared to the observed range (10.8–33.0 mm). The variance ratio \(r = \mathrm{Var}(\hat{z})/\mathrm{Var}(z) = 0.0146\) quantifies this collapse: Kriging predictions span less than 1.5\% of the observed variance. The Gate C safeguard detects this pattern and disqualifies Kriging from selection, choosing IDW as the spatial method. Without this safeguard, the system would have selected Kriging and delivered a final recommendation based on a near-constant predictor —a methodologically incorrect result that is not detectable through the three individual metrics.

\subsection{H\&W regional estimation}
The framework selects GEV as the regional distribution with \(|Z| = 0.28\), well below the \(\pm 1.64\) threshold, which constitutes an excellent fit. The power of the \(Z\) test is classified as MODERATE given the total of 254 station-years (recommended threshold \(\geq 500\)). However, the H\&W bootstrap confidence interval is wide: [10.72; 37.80] mm, with an amplitude of 121\%. This result triggers Gate A of the weighting system, nullifying the H\&W weight. The lower bound of the CI (10.72 mm) is physically compatible with regions of low rainfall regime, but the relative amplitude indicates that the regional estimation lacks sufficient precision to constitute a reliable point estimator in design applications.

\subsection{Quality gate system — \textit{fallback} case}
The simultaneous triggering of the three gates (A: CI amplitude \(\geq 100\%\); B: \(R^2 < 0.3\); C: Kriging variance ratio \(< 0.10\)) corresponds to the \textit{fallback} regime: no individual method offers reliability guarantees, and the system falls back to the pointwise IDW estimator with expanded \(\pm 30\%\) sensitivity. This scenario is \emph{expected} with sparse rain gauge networks and demonstrates that the framework recognizes its own limitations —a desirable feature for operational transfer to non-expert users.

\subsubsection{Empirical justification of the operational thresholds}
The three thresholds governing the gate system (CI amplitude \(\geq 100\%\); \(R^2 < 0.3\); variance ratio \(< 0.10\)) are pragmatic operational criteria calibrated for the sparse rain gauge network regime. Their justification is as follows:
\begin{itemize}
\item \textbf{Gate A (CI amplitude \(\geq 100\%\)):} a bootstrap CI whose amplitude equals or exceeds the estimator's central value implies a coefficient of variation \(\geq 0.25\), practically non-informative for hydrological design.
\item \textbf{Gate B (\(R^2 < 0.3\)):} the interpolator does not appreciably improve upon the null model (prediction \(=\) regional mean).
\item \textbf{Gate C (\(r < 0.10\)):} the collapsed variogram produces a predictor spanning less than 10\% of the observed variance, a condition incompatible with the BLUP assumption of Kriging.
\end{itemize}
Additionally, the Monte Carlo validation (Section~\ref{sec:mc}) provides \emph{empirical justification} of the \(\pm 30\%\) sensitivity range: the observed coverage of 85.0\% confirms that this range captures the majority of cases in networks with 9 stations × 30 years.

\subsection{Systematic bias and calibration recommendations}
The Monte Carlo validation identifies a systematic underestimation bias of \(-20.7\%\). This finding, far from invalidating the framework, enriches it by making it quantitatively honest: the user knows that, for minimum networks in climates dominated by heavy-tailed GEV (\(\kappa < 0\)), the framework's final recommendation systematically underestimates the true GRADEX by approximately one-fifth of its value. The operational recommendations are:
\begin{enumerate}
\item For preliminary design with networks \(< 200\) station-years in Andean climates, apply a correction factor of \(+20\% \le f_c \le +25\%\) to the final GRADEX.
\item For definitive design, expand the network to at least \(\geq 500\) station-years before applying the framework; the bias decays with the accumulated record length.
\item Consider replacing the pointwise Gumbel layer with GEV-L-moments in future versions of the framework to reduce the bias at the source.
\end{enumerate}

\subsection{Implications for hydrological design}
The GRADEX parameter is not entered directly into HEC-HMS as a model parameter, but is used to construct the design hyetograph. The recommended workflow is:

\begin{enumerate}
\item Use the base recommendation value (20.40 mm) as the primary GRADEX estimator, optionally corrected upward by \(+20\%\) if the network has \(< 200\) station-years.
\item Construct the design storm for \(T = 100\) years using \(P_{100} = P_{10} + \text{GRADEX} \times (y_{100} - y_{10})\).
\item Distribute the precipitation into a hyetograph using the alternating block method or the SCS method.
\item Enter the hyetograph into HEC-HMS as \textit{User-Specified Hyetograph}.
\item Perform sensitivity analysis using the minimum (14.28 mm) and maximum (26.52 mm) values corresponding to the \(\pm 30\%\) range (empirical coverage of 85\%).
\end{enumerate}

\subsection{Criteria for rain gauge network expansion}
The case study underscores that expanding the rain gauge network is the most effective measure to improve the reliability of the GRADEX estimate. As a practical guiding criterion, the following thresholds are proposed:

\begin{itemize}
\item \textbf{Moderate confidence:} \(\geq 8\) stations with records \(\geq 25\) years (\(\geq 200\) station-years, regimen of the current case study).
\item \textbf{High confidence:} \(\geq 12\) stations with records \(\geq 30\) years (\(\geq 360\) station-years).
\item \textbf{Reliable discrimination of the \(Z\) test:} \(\geq 500\) station-years \cite{hosking1997}.
\end{itemize}

\section{Limitations and Future Work}
\label{sec:limitations}
\subsection{Known limitations}
\begin{enumerate}
\item The GRADEX approximation for distributions other than Gumbel (chord slope between \(T = 10\) and \(T = 100\) years) may underestimate the true tail slope for heavy-tailed distributions at return periods exceeding 100 years.
\item \textbf{Systematic bias identified by Monte Carlo validation:} when the true distribution is GEV with \(\kappa < 0\) (heavy-tailed) and the pointwise layer uses Gumbel-L-moments, the framework underestimates GRADEX by approximately 21\%. This bias should be corrected by a multiplicative factor of \(\sim 1.25\) in preliminary design applications, or by replacing the pointwise layer with GEV-L-moments in future versions.
\item The regional analysis assumes regional homogeneity. If \(H \geq 2\), the regional distribution fitting lacks statistical justification and the results should be discarded.
\item With fewer than 8 stations, the Ljung-Box test has very low power to detect real autocorrelation.
\item LOO cross-validation with fewer than 6 stations may be optimistic in estimating prediction error.
\item Kriging requires at least 5 stations for robust semivariogram fitting. Additionally, in regions with nugget-dominated variograms (N/S \(\to\) 1), Kriging may collapse to a near-constant predictor; the Gate C safeguard of v1.0 detects this pathological case.
\item The framework does not incorporate elevation-precipitation relationships (altitudinal gradient), particularly relevant in Andean basins with strong orographic effects such as the Cauca River basin.
\item The framework assumes stationarity in the annual maximum precipitation series. The presence of significant trends would violate this assumption and the results should be interpreted with caution \cite{milly2008, cooley2009}; in the current case study, no station exhibits a significant trend, but the detrended sensitivity analysis (Table~\ref{table:sensitivity}) shows that even non-significant trends can modify the GRADEX by up to 34\%. GRADEX-DUAL does not currently implement non-stationary models.
\item The Monte Carlo validation assumes a regional GEV distribution with fixed parameters. The generalization of the experiment to other distributional forms (GLO, GNO, PE3, GPA), other record lengths, and other network sizes remains pending as future work.
\end{enumerate}

\subsection{Lines of future development}
\begin{enumerate}
\item Replacement of the pointwise Gumbel layer with GEV-L-moments to reduce the systematic bias identified by the Monte Carlo validation.
\item Generalization of the Monte Carlo experiment to construct bias and coverage maps as a function of record length and network size.
\item Integration of Universal Kriging with elevation as a trend covariate for Andean basins.
\item Implementation of the formal regional GRADEX estimation framework \cite{galea1997}.
\item Extension to sub-daily precipitation for the generation of IDF curves.
\item Automatic delineation of sub-regions based on hierarchical clustering of L-moments.
\item Incorporation of non-stationary models (GEV with time-dependent parameters) for series with significant trends.
\item Automatic changepoint analysis (Pettitt, Buishand) for temporal segmentation of non-homogeneous series.
\item Interactive Shiny dashboard for real-time parameter adjustment.
\end{enumerate}

\section{Conclusions}
GRADEX-DUAL v1.0, an R computational framework that combines classic pointwise frequency analysis \cite{guillot1967} with regional frequency analysis by L-moments \cite{hosking1997} in an automated, auditable, and reproducible workflow for GRADEX parameter estimation in hydrological design applications with HEC-HMS, was presented.

The main contributions of this work are:

\begin{enumerate}
\item A weighting system with \emph{three} quality gates that objectively integrates both methodological layers: Gate A (H\&W bootstrap CI amplitude \(\geq 100\%\)), Gate B (cross-validation \(R^2 < 0.3\)), and Gate C (Kriging variance ratio \(< 0.10\)). The third gate detects the pathological regime of nugget-dominated Kriging, which produces near-constant predictions and spuriously favorable \(R^2\) and |BIAS| metrics.
\item A complete set of prior diagnostics —serial independence (Ljung--Box), regional homogeneity (Hosking--Wallis \(H\) statistic), discordancy \(D\), Mann--Kendall trend with Theil--Sen slope, and detrended sensitivity analysis— that verifies the assumptions of frequency analysis before proceeding with estimation.
\item The implementation of the Hosking and Wallis bootstrap simulation algorithm (Section~6.3 of \cite{hosking1997}), which simultaneously propagates the uncertainty of parameter estimation, inter-station variability, record-length differences, and spatial interpolation error in a single CI with correct nominal coverage.
\item \textbf{A Monte Carlo validation experiment} against synthetic regions with known true GRADEX, which quantifies the bias (\(-20.7\%\) in the case study regimen), the RMSE (8.32 mm), and the empirical coverage of the \(\pm 30\%\) sensitivity interval (85.0\% over 200 replicates). This validation provides the first empirical justification of the \(\pm 30\%\) range and reveals that the pointwise Gumbel layer systematically underestimates GRADEX when the true regional distribution is heavy-tailed GEV.
\item The evaluation in the upper Porce River basin (northern Antioquia, Colombia, Porce–Nechí–Cauca–Magdalena hydrographic system) with a network of \emph{nine IDEAM stations} and 254 station-years demonstrates that, even with a diagnostically homogeneous region (\(H = -0.92\)), the three gates are triggered, the system recognizes its own inadequacy, and correctly delivers a GRADEX of 20.40 mm with \(\pm 30\%\) sensitivity (range [14.28; 26.52] mm) in the \textit{fallback} regime.
\end{enumerate}

The case study underscores that expanding the rain gauge network is the most effective measure to improve the reliability of the GRADEX estimate. At least 8 stations with records of 25 years or more (\(\geq 200\) station-years) are required for the system to operate outside the \textit{fallback} regime, and at least 500 station-years for the \(Z\) test to be discriminative \cite{hosking1997}.

\medskip

\noindent \textbf{Practical recommendation for framework users:} for preliminary design applications in Andean climates with networks \(< 200\) station-years, it is suggested to apply a correction factor of \(+20\% \le f_c \le +25\%\) to the framework's final GRADEX as compensation for the systematic bias identified by the Monte Carlo validation. For definitive design, expand the network to \(\geq 500\) station-years or replace the pointwise Gumbel layer with GEV-L-moments.

\section*{Code and Data Availability}
The GRADEX-DUAL v1.0 computational framework is open-source. The R script and the case study data are available at \url{https://github.com/MauricioVictoriaN/GRADEX-DUAL} under the MIT license.

\section*{Acknowledgments}
The author thanks the anonymous reviewers for their valuable comments and suggestions, which significantly contributed to improving the quality of this manuscript.

\section*{Declarations}
\textbf{Conflict of interest:} The author declares no conflicts of interest.

\textbf{Funding:} This research was self-funded by the author.

\textbf{Author contribution:} M.J.V.N. conceptualized the dual framework, developed the R code, performed the data analysis, and wrote the manuscript.

\begin{thebibliography}{99}

\bibitem{guillot1967}
Guillot, P. and Duband, D. (1967). La méthode du Gradex pour le calcul de la probabilité des crues à partir des pluies. \emph{IAHS Publication No. 84}, 560--569.

\bibitem{naghettini1995}
Naghettini, M. (1995). O gradiente de chuvas intensas como parâmetro de projeto de obras hidráulicas. \emph{XI Simpósio Brasileiro de Recursos Hídricos}, Recife, Brasil.

\bibitem{galea1997}
Galea, G. and Prudhomme, C. (1997). Notions de base et concepts utiles pour la compréhension de la modélisation synthétique des régimes de crue. \emph{Revue des Sciences de l'Eau}, 10(1), 83--101. \doi{10.7202/705276ar}.

\bibitem{hosking1997}
Hosking, J.R.M. and Wallis, J.R. (1997). \emph{Regional Frequency Analysis: An Approach Based on L-Moments}. Cambridge University Press. \doi{10.1017/CBO9780511529443}.

\bibitem{hosking1990}
Hosking, J.R.M. (1990). L-moments: analysis and estimation of distributions using linear combinations of order statistics. \emph{Journal of the Royal Statistical Society, Series B}, 52(1), 105--124. \doi{10.1111/j.2517-6161.1990.tb01775.x}.

\bibitem{hiemstra2009}
Hiemstra, P.H., Pebesma, E.J., Twenhofel, C.J.W. and Heuvelink, G.B.M. (2009). Real-time automatic interpolation of ambient gamma dose rates from the Dutch radioactivity monitoring network. \emph{Computers \& Geosciences}, 35(8), 1711--1721. \doi{10.1016/j.cageo.2008.10.011}.

\bibitem{ljung1978}
Ljung, G.M. and Box, G.E.P. (1978). On a measure of lack of fit in time series models. \emph{Biometrika}, 65(2), 297--303. \doi{10.1093/biomet/65.2.297}.

\bibitem{stedinger1993}
Stedinger, J.R., Vogel, R.M. and Foufoula-Georgiou, E. (1993). Frequency analysis of extreme events. In: D.R. Maidment (ed.), \emph{Handbook of Hydrology}, McGraw-Hill. Chapter 18.

\bibitem{omm2008}
World Meteorological Organization (WMO) (2008). \emph{Guide to Hydrological Practices. Volume II: Management of Water Resources and Application of Hydrological Practices}. WMO-No. 168 (6th ed.), Geneva.

\bibitem{khaliq2006}
Khaliq, M.N., Ouarda, T.B.M.J., Ondo, J.-C., Gachon, P. and Bobée, B. (2006). Frequency analysis of a sequence of dependent and/or non-stationary hydro-meteorological observations: A review. \emph{Journal of Hydrology}, 329(3--4), 534--552. \doi{10.1016/j.jhydrol.2006.03.004}.

\bibitem{milly2008}
Milly, P.C.D., Betancourt, J., Falkenmark, M., Hirsch, R.M., Kundzewicz, Z.W., Lettenmaier, D.P. and Stouffer, R.J. (2008). Stationarity is dead: Whither water management? \emph{Science}, 319(5863), 573--574. \doi{10.1126/science.1151915}.

\bibitem{cooley2009}
Cooley, D. (2009). Extreme value analysis and the study of climate change. \emph{Climatic Change}, 97, 77--83. \doi{10.1007/s10584-009-9627-x}.

\end{thebibliography}

\end{document}