Source code for parasolpy.nowak

"""Nonparametric stochastic disaggregation after Nowak et al. (2010).

Reference:
    Nowak, K., J. Prairie, B. Rajagopalan, and U. Lall (2010),
    A nonparametric stochastic approach for multisite disaggregation of
    annual to daily streamflow, Water Resour. Res., 46, W08529.
"""

import numpy as np



[docs]
def choose_analog_index(rng, Z, sim_Z):
    """Select an analog year index using K-nearest-neighbor weighting.

    Finds the K nearest neighbors in ``Z`` to the simulated annual flow
    ``sim_Z`` (where ``K = floor(sqrt(len(Z)))``), then draws one index
    with probability inversely proportional to rank distance (eq. 1 in the
    paper).

    Args:
        rng: numpy random Generator instance (e.g. ``numpy.random.default_rng()``).
        Z: 1-D numpy array of observed aggregated (annual) flows.
        sim_Z: Scalar simulated aggregated flow for the current year.

    Returns:
        int: Index into ``Z`` of the chosen analog year.
    """
    # Calculate the distance between the yearly flows and the simulated value
    dist = np.absolute(Z - sim_Z)

    # inds will be the indices of the original sequence in ascending order
    inds = dist.argsort()

    # these are the yearly flows, sorted by their distance from the simulated flow
    sorted_Z = Z[inds]

    # the number of neighbors is a function of the
    # number of datapoints in the yearly sequence
    K = int(np.floor(np.sqrt(len(Z))))

    # the weight function gives the most weight
    # to the first neighbor (eq 1 in the paper)
    W = np.zeros(K)
    for i in range(K):
        W[i] = (1. / (i + 1.)) / sum(1. / k for k in range(1, K + 1))
    W = W / W.sum()

    # here, we only keep the K nearest neighbors based on distance
    neighbors = sorted_Z[0:K]
    neighbors_inds = inds[0:K]

    # the index of the chosen year in the original Z sequence
    chosen_index = rng.choice(neighbors_inds, size=1, p=W)

    return int(chosen_index[0])




[docs]
def sim_single_year(rng, Z, p, years, sim_Z, print_results=False):
    """Disaggregate one simulated annual flow into sub-annual periods.

    Selects an analog year via :func:`choose_analog_index`, then scales its
    proportion vector by ``sim_Z`` to produce a disaggregated flow sequence.

    Args:
        rng: numpy random Generator instance.
        Z: 1-D array of observed annual flows (length = number of historical years).
        p: 2-D array of shape ``(num_years, num_periods)`` where each row is the
            fraction of annual flow in each sub-annual period.
        years: 1-D array of calendar years corresponding to rows of ``p``.
        sim_Z: Scalar simulated annual flow for the year being disaggregated.
        print_results: If True, print a one-line summary to stdout.

    Returns:
        numpy.ndarray: 1-D array of length ``num_periods`` with the disaggregated
        flow for each sub-annual period.
    """

    chosen_index = choose_analog_index(rng, Z, sim_Z)

    # ...is used to find a proportion vector
    sim_p = p[chosen_index, :]

    # the simulated flow sequence is the proportion multiplied by simulated yearly flow
    sim_flow = sim_p * sim_Z

    if print_results is True:
        print(
            f"Sim annual {sim_Z: 0.1f}, "
            f"using analog year {years[chosen_index]}: "
            f"{np.array2string(sim_flow, precision=1, floatmode='fixed')}"
        )

    return sim_flow




[docs]
def sim_multi_trace(rng, Z, p, years, sim_Z, repl=1, print_results=False):
    """Disaggregate multiple simulated annual sequences into sub-annual traces.

    Calls :func:`sim_single_year` for every combination of sequence, replicate,
    and year in ``sim_Z``, assembling results into a single output matrix.

    Args:
        rng: numpy random Generator instance.
        Z: 1-D array of observed annual flows.
        p: 2-D array of shape ``(num_years, num_periods)`` with proportion vectors.
        years: 1-D array of calendar years corresponding to rows of ``p``.
        sim_Z: 2-D array of shape ``(num_sequences, num_sim_years)`` with simulated
            annual flows; rows are independent sequences, columns are years.
        repl: Number of replicates generated for each row of ``sim_Z``. Defaults to 1.
        print_results: If True, print per-year summary lines to stdout.

    Returns:
        numpy.ndarray: 2-D array of shape
        ``(num_periods * num_sim_years, num_sequences * repl)`` containing the
        disaggregated flows. Each column is one complete trace.
    """

    # what is the shape of the proportion matrix, p?
    num_input_years, num_periods = p.shape

    # what is the shape of the simulated annual data?
    num_seq, num_sim_years = sim_Z.shape

    # the output matrix will have the following shape:
    # rows: number of disaggregated periods * number of simulated years
    # columns: number of annual sequences * number of replicates
    #
    # example: seasonal data (4x per year), for sequences of 3 years: 12 rows
    # 5 annual sequences, each repeated twice: 10 columns
    mat_sim = np.zeros((num_periods * num_sim_years, num_seq * repl))

    j = 0  # column for the final output
    for r in range(repl):  # repeat for the number of replicates given by repl
        for s in range(num_seq):  # multiple sequences
            i = 0  # row for the final output
            for y in range(num_sim_years):  # multiple years
                sim_seas = sim_single_year(rng, Z, p, years, sim_Z[s, y])
                mat_sim[i:i + num_periods, j] = sim_seas
                i = i + num_periods
            j = j + 1

    return mat_sim