Source code for robpy.utils.median

import numpy as np


[docs] def l1median(X: np.ndarray) -> float: """ Implementation of the L1-median. Args: X (np.ndarray): Data to compute the L1-median on. References: - Fritz, H., Filzmoser, P., & Croux, C. (2012). A comparison of algorithms for the multivariate L1-median. Computational Statistics, 27, 393-410. """ epsilon = 1e-7 mu_0 = np.mean(X, axis=0) while True: centered_X = X - mu_0 d = np.sqrt(np.sum(centered_X * centered_X, axis=1)) W = 1 / d mu_1 = np.sum((X * W[:, np.newaxis]).T / np.sum(W), axis=1) if np.sum(np.abs(mu_0 - mu_1)) < epsilon: return mu_1 else: mu_0 = mu_1
[docs] def weighted_median(X: np.ndarray, weights: np.ndarray) -> float: """ Computes a weighted median. Args: X (np.ndarray): Data to compute the weighted median on. weights (np.ndarray): The weigths used. References: - Croux, C., & Rousseeuw, P. J. (1992). Time-efficient algorithms for two highly robust estimators of scale. In Computational Statistics: Volume 1: Proceedings of the 10th Symposium on Computational Statistics (pp. 411-428). Heidelberg: Physica-Verlag HD. """ n = len(X) wrest = 0 wtotal = np.sum(weights) Xcand = X while True: k = np.ceil(n / 2).astype("int") if n > 1: trial = np.partition(X, k - 1)[ :k ].max() # k^th order statistic, I think this can be programmed better... else: return Xcand[0] wleft = np.sum(weights[X < trial]) wmid = np.sum(weights[X == trial]) if (2 * (wrest + wleft)) > wtotal: Xcand = X[X < trial] weightscand = weights[X < trial] elif (2 * (wrest + wleft + wmid)) > wtotal: return trial else: Xcand = X[X > trial] weightscand = weights[X > trial] wrest = wrest + wleft + wmid X = Xcand weights = weightscand n = len(X)