Source code for robpy.preprocessing.utils

import numpy as np

from typing import Callable
from scipy.stats import median_abs_deviation


[docs] def wrapping_transformation( X: np.ndarray, b: float = 1.5, c: float = 4.0, q1: float = 1.540793, q2: float = 0.8622731, rescale: bool = False, location_estimator: Callable[[np.ndarray, int], np.ndarray] = np.median, scale_estimator: Callable[[np.ndarray, int], np.ndarray] = median_abs_deviation, ) -> np.ndarray: """ Implementation of wrapping using this transformation function: .. math:: \\Psi_{b, c}(z) = \\begin{cases} z & if \\ 0 \\leq |z| < b \\\\ q_1 \\tanh\\left(q_2 (c - |z|)\\right) \\mathrm{sign}(z) & if \\ b \\leq |z| \\leq c \\\\ 0 & if \\ c < |z| \\end{cases} Args: X: data to be transformed, must have shape (N, D) b: lower cutoff c: upper cutoff q1, q2: transformation parameters (see formula) rescale: whether to rescale the wrapped data so the robust location and scale of the transformed data are the same as the original data locations: location estimates of the columns of X (optional) scales: scale estimates of the columns of X (optional) Returns: transformed data """ locations = location_estimator(X, axis=0) scales = scale_estimator(X, axis=0) scales_no_zero = np.where(scales == 0, 1, scales) z = (X - locations) / scales_no_zero z_wrapped = np.where( np.abs(z) < b, z, np.where(np.abs(z) <= c, q1 * np.tanh(q2 * (c - np.abs(z))) * np.sign(z), 0), ) if rescale: z_wrapped_mean = np.mean(z_wrapped, axis=0) z_wrapped_std = np.std(z_wrapped, axis=0) z_wrapped_std_no_zero = np.where(z_wrapped_std == 0, 1, z_wrapped_std) return ( z_wrapped * (scales / z_wrapped_std_no_zero) + locations - (z_wrapped_mean * (scales / z_wrapped_std_no_zero)) ) else: return z_wrapped * scales + locations