import numpy as np
from robpy.univariate.base import RobustScale, LocationOrScaleEstimator
from robpy.utils.median import weighted_median
[docs]
class Qn(RobustScale):
def __init__(
self,
location_func: LocationOrScaleEstimator = np.median,
consistency_correction: bool = True,
finite_correction: bool = True,
):
"""
The Qn estimator of Rousseeuw, P.J. & Croux, C. (1993) as implemented in the
:math:`O(n \\log n)` algorithm of Croux, C. & Rousseeuw, P.J. (1992).
Args:
location_func (LocationOrScaleEstimator, optional):
As the Qn estimator does not estimate location, a location function should be
explicitly passed. Defaults to np.median.
consistency_correction (boolean, optional):
Boolean indicating if consistency for normality should be applied. Defaults to True.
finite_correction (boolean, optional):
Boolean indicating if finite sample correction should be applied. Defaults to True.
References:
- Croux, C., & Rousseeuw, P. J. (1992). Time-efficient algorithms for two highly robust
estimators of scale. In Computational Statistics: Volume 1: Proceedings of the 10th
Symposium on Computational Statistics (pp. 411-428). Heidelberg: Physica-Verlag HD.
- Rousseeuw P.J., & Croux, C. (1993). Alternatives to the Median Absolute Deviation.
Journal of the American Statistical Association, 88(424), 1273–1283
"""
super().__init__()
self.location_func = location_func
self.consistency_correction = consistency_correction
self.finite_correction = finite_correction
def _calculate(self, X: np.ndarray):
"""
Args:
X (np.ndarray):
Univariate data.
"""
n = len(X)
h = n // 2 + 1
k = h * (h - 1) // 2
y = np.sort(X)
left = n + 1 - np.arange(n)
right = np.full(n, n)
jhelp = n * (n + 1) // 2
knew = k + jhelp
nL = jhelp
nR = n * n
found = False
while (nR - nL) > n and (not found):
weight = right - left + 1
jhelp = (left + weight // 2).astype(int)
work = y - y[n - jhelp]
trial = weighted_median(work, weight)
P = np.searchsorted(-np.flip(y), trial - y, "left", np.arange(n))
Q = np.searchsorted(-np.flip(y), trial - y, "right", np.arange(n)) + 1
if knew <= np.sum(P):
right = P
nR = np.sum(P)
elif knew > (np.sum(Q) - n):
left = Q
nL = np.sum(Q) - n
else:
Qn = trial
found = True
if not found:
work = []
for i in range(1, n):
if left[i] <= right[i]:
for jj in range(int(left[i]), int(right[i] + 1)):
work.append(y[i] - y[n - jj - 1 + 1])
k = int(knew - nL)
Qn = np.partition(np.array(work), k - 1)[:k].max()
if self.finite_correction:
dn = _get_small_sample_dn(n)
Qn = dn * Qn
if self.consistency_correction:
c = 2.219144
Qn = c * Qn
self.scale_ = Qn
self.location_ = self.location_func(X)
def _get_small_sample_dn(n: int):
"""
Calculates the correction factor for the Qn estimator at small samples.
References:
- Croux, C., & Rousseeuw, P. J. (1992). Time-efficient algorithms for two highly robust
estimators of scale. In Computational Statistics: Volume 1: Proceedings of the 10th
Symposium on Computational Statistics (pp. 411-428). Heidelberg: Physica-Verlag HD.
"""
DNDICT = {
2: 0.399,
3: 0.994,
4: 0.512,
5: 0.844,
6: 0.611,
7: 0.857,
8: 0.669,
9: 0.872,
}
if n <= 9:
return DNDICT.get(n)
elif n % 2 != 0:
return n / (n + 1.4)
return n / (n + 3.8)