Source code for robpy.univariate.qn

import numpy as np

from robpy.univariate.base import RobustScale, LocationOrScaleEstimator
from robpy.utils.median import weighted_median


[docs] class Qn(RobustScale): def __init__( self, location_func: LocationOrScaleEstimator = np.median, consistency_correction: bool = True, finite_correction: bool = True, ): """ The Qn estimator of Rousseeuw, P.J. & Croux, C. (1993) as implemented in the :math:`O(n \\log n)` algorithm of Croux, C. & Rousseeuw, P.J. (1992). Args: location_func (LocationOrScaleEstimator, optional): As the Qn estimator does not estimate location, a location function should be explicitly passed. Defaults to np.median. consistency_correction (boolean, optional): Boolean indicating if consistency for normality should be applied. Defaults to True. finite_correction (boolean, optional): Boolean indicating if finite sample correction should be applied. Defaults to True. References: - Croux, C., & Rousseeuw, P. J. (1992). Time-efficient algorithms for two highly robust estimators of scale. In Computational Statistics: Volume 1: Proceedings of the 10th Symposium on Computational Statistics (pp. 411-428). Heidelberg: Physica-Verlag HD. - Rousseeuw P.J., & Croux, C. (1993). Alternatives to the Median Absolute Deviation. Journal of the American Statistical Association, 88(424), 1273–1283 """ super().__init__() self.location_func = location_func self.consistency_correction = consistency_correction self.finite_correction = finite_correction def _calculate(self, X: np.ndarray): """ Args: X (np.ndarray): Univariate data. """ n = len(X) h = n // 2 + 1 k = h * (h - 1) // 2 y = np.sort(X) left = n + 1 - np.arange(n) right = np.full(n, n) jhelp = n * (n + 1) // 2 knew = k + jhelp nL = jhelp nR = n * n found = False while (nR - nL) > n and (not found): weight = right - left + 1 jhelp = (left + weight // 2).astype(int) work = y - y[n - jhelp] trial = weighted_median(work, weight) P = np.searchsorted(-np.flip(y), trial - y, "left", np.arange(n)) Q = np.searchsorted(-np.flip(y), trial - y, "right", np.arange(n)) + 1 if knew <= np.sum(P): right = P nR = np.sum(P) elif knew > (np.sum(Q) - n): left = Q nL = np.sum(Q) - n else: Qn = trial found = True if not found: work = [] for i in range(1, n): if left[i] <= right[i]: for jj in range(int(left[i]), int(right[i] + 1)): work.append(y[i] - y[n - jj - 1 + 1]) k = int(knew - nL) Qn = np.partition(np.array(work), k - 1)[:k].max() if self.finite_correction: dn = _get_small_sample_dn(n) Qn = dn * Qn if self.consistency_correction: c = 2.219144 Qn = c * Qn self.scale_ = Qn self.location_ = self.location_func(X)
def _get_small_sample_dn(n: int): """ Calculates the correction factor for the Qn estimator at small samples. References: - Croux, C., & Rousseeuw, P. J. (1992). Time-efficient algorithms for two highly robust estimators of scale. In Computational Statistics: Volume 1: Proceedings of the 10th Symposium on Computational Statistics (pp. 411-428). Heidelberg: Physica-Verlag HD. """ DNDICT = { 2: 0.399, 3: 0.994, 4: 0.512, 5: 0.844, 6: 0.611, 7: 0.857, 8: 0.669, 9: 0.872, } if n <= 9: return DNDICT.get(n) elif n % 2 != 0: return n / (n + 1.4) return n / (n + 3.8)