Source code for robpy.preprocessing.utils
import numpy as np
from typing import Callable
from scipy.stats import median_abs_deviation
[docs]
def wrapping_transformation(
X: np.ndarray,
b: float = 1.5,
c: float = 4.0,
q1: float = 1.540793,
q2: float = 0.8622731,
rescale: bool = False,
location_estimator: Callable[[np.ndarray, int], np.ndarray] = np.median,
scale_estimator: Callable[[np.ndarray, int], np.ndarray] = median_abs_deviation,
) -> np.ndarray:
"""
Implementation of wrapping using this transformation function:
.. math::
\\Psi_{b, c}(z) =
\\begin{cases}
z & if \\ 0 \\leq |z| < b \\\\
q_1 \\tanh\\left(q_2 (c - |z|)\\right) \\mathrm{sign}(z) & if \\ b \\leq |z| \\leq c \\\\
0 & if \\ c < |z|
\\end{cases}
Args:
X: data to be transformed, must have shape (N, D)
b: lower cutoff
c: upper cutoff
q1, q2: transformation parameters (see formula)
rescale: whether to rescale the wrapped data so the robust location and scale
of the transformed data are the same as the original data
locations: location estimates of the columns of X (optional)
scales: scale estimates of the columns of X (optional)
Returns:
transformed data
"""
locations = location_estimator(X, axis=0)
scales = scale_estimator(X, axis=0)
scales_no_zero = np.where(scales == 0, 1, scales)
z = (X - locations) / scales_no_zero
z_wrapped = np.where(
np.abs(z) < b,
z,
np.where(np.abs(z) <= c, q1 * np.tanh(q2 * (c - np.abs(z))) * np.sign(z), 0),
)
if rescale:
z_wrapped_mean = np.mean(z_wrapped, axis=0)
z_wrapped_std = np.std(z_wrapped, axis=0)
z_wrapped_std_no_zero = np.where(z_wrapped_std == 0, 1, z_wrapped_std)
return (
z_wrapped * (scales / z_wrapped_std_no_zero)
+ locations
- (z_wrapped_mean * (scales / z_wrapped_std_no_zero))
)
else:
return z_wrapped * scales + locations