Source code for robpy.univariate.qn

import numpy as np

from robpy.univariate.base import RobustScale, LocationOrScaleEstimator
from robpy.utils.median import weighted_median



[docs]
class Qn(RobustScale):
    def __init__(
        self,
        location_func: LocationOrScaleEstimator = np.median,
        consistency_correction: bool = True,
        finite_correction: bool = True,
    ):
        """
        The Qn estimator of Rousseeuw, P.J. & Croux, C. (1993) as implemented in the
        :math:`O(n \\log n)` algorithm of Croux, C. & Rousseeuw, P.J. (1992).

        Args:
            location_func (LocationOrScaleEstimator, optional):
                As the Qn estimator does not estimate location, a location function should be
                explicitly passed. Defaults to np.median.
            consistency_correction (boolean, optional):
                Boolean indicating if consistency for normality should be applied. Defaults to True.
            finite_correction (boolean, optional):
                Boolean indicating if finite sample correction should be applied. Defaults to True.

        References:
            - Croux, C., & Rousseeuw, P. J. (1992). Time-efficient algorithms for two highly robust
              estimators of scale. In Computational Statistics: Volume 1: Proceedings of the 10th
              Symposium on Computational Statistics (pp. 411-428). Heidelberg: Physica-Verlag HD.
            - Rousseeuw P.J., & Croux, C. (1993). Alternatives to the Median Absolute Deviation.
              Journal of the American Statistical Association, 88(424), 1273–1283
        """
        super().__init__()
        self.location_func = location_func
        self.consistency_correction = consistency_correction
        self.finite_correction = finite_correction

    def _calculate(self, X: np.ndarray):
        """
        Args:
            X (np.ndarray):
                Univariate data.
        """

        n = len(X)
        h = n // 2 + 1
        k = h * (h - 1) // 2
        y = np.sort(X)
        left = n + 1 - np.arange(n)
        right = np.full(n, n)
        jhelp = n * (n + 1) // 2
        knew = k + jhelp
        nL = jhelp
        nR = n * n
        found = False

        while (nR - nL) > n and (not found):
            weight = right - left + 1
            jhelp = (left + weight // 2).astype(int)
            work = y - y[n - jhelp]
            trial = weighted_median(work, weight)
            P = np.searchsorted(-np.flip(y), trial - y, "left", np.arange(n))
            Q = np.searchsorted(-np.flip(y), trial - y, "right", np.arange(n)) + 1
            if knew <= np.sum(P):
                right = P
                nR = np.sum(P)
            elif knew > (np.sum(Q) - n):
                left = Q
                nL = np.sum(Q) - n
            else:
                Qn = trial
                found = True
        if not found:
            work = []
            for i in range(1, n):
                if left[i] <= right[i]:
                    for jj in range(int(left[i]), int(right[i] + 1)):
                        work.append(y[i] - y[n - jj - 1 + 1])
            k = int(knew - nL)
            Qn = np.partition(np.array(work), k - 1)[:k].max()

        if self.finite_correction:
            dn = _get_small_sample_dn(n)
            Qn = dn * Qn
        if self.consistency_correction:
            c = 2.219144
            Qn = c * Qn

        self.scale_ = Qn
        self.location_ = self.location_func(X)



def _get_small_sample_dn(n: int):
    """
    Calculates the correction factor for the Qn estimator at small samples.

    References:
        - Croux, C., & Rousseeuw, P. J. (1992). Time-efficient algorithms for two highly robust
        estimators of scale. In Computational Statistics: Volume 1: Proceedings of the 10th
        Symposium on Computational Statistics (pp. 411-428). Heidelberg: Physica-Verlag HD.
    """
    DNDICT = {
        2: 0.399,
        3: 0.994,
        4: 0.512,
        5: 0.844,
        6: 0.611,
        7: 0.857,
        8: 0.669,
        9: 0.872,
    }
    if n <= 9:
        return DNDICT.get(n)
    elif n % 2 != 0:
        return n / (n + 1.4)
    return n / (n + 3.8)