Source code for dfttoolkit.utils.math_utils

from copy import deepcopy
from typing import Any

import numpy as np
import numpy.typing as npt
import scipy



[docs]
def get_rotation_matrix(
    vec_start: npt.NDArray[np.float64], vec_end: npt.NDArray[np.float64]
) -> npt.NDArray[np.float64]:
    """
    Calculate the rotation matrix to align two unit vectors.

    Given a two (unit) vectors, vec_start and vec_end, this function calculates the
    rotation matrix U, so that U * vec_start = vec_end.

    U the is rotation matrix that rotates vec_start to point in the direction
    of vec_end.

    https://math.stackexchange.com/questions/180418/calculate-rotation-matrix-to-align-vector-a-to-vector-b-in-3d/897677

    Parameters
    ----------
    vec_start, vec_end : NDArray[float64]
        Two vectors that should be aligned. Both vectors must have a l2-norm of 1.

    Returns
    -------
    NDArray[float64]
        The rotation matrix U as npt.NDArray with shape (3,3)
    """
    if not np.isclose(np.linalg.norm(vec_start), 1) and not np.isclose(
        np.linalg.norm(vec_end), 1
    ):
        raise ValueError("`vec_start` and `vec_end` args must be unit vectors")

    v = np.cross(vec_start, vec_end)
    c = np.dot(vec_start, vec_end)
    v_x = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
    return np.eye(3) + v_x + v_x.dot(v_x) / (1 + c)




[docs]
def get_rotation_matrix_around_axis(axis: npt.NDArray, phi: float) -> npt.NDArray:
    """
    Generate a rotation matrix around a given vector.

    Parameters
    ----------
    axis : NDArray
        Axis around which the rotation is done.
    phi : float
        Angle of rotation around axis in radiants.

    Returns
    -------
    NDArray
        Rotation matrix
    """
    axis_vec = np.array(axis, dtype=np.float64)
    axis_vec /= np.linalg.norm(axis_vec)

    eye = np.eye(3, dtype=np.float64)
    ddt = np.outer(axis_vec, axis_vec)
    skew = np.array(
        [
            [0, axis_vec[2], -axis_vec[1]],
            [-axis_vec[2], 0, axis_vec[0]],
            [axis_vec[1], -axis_vec[0], 0],
        ],
        dtype=np.float64,
    )

    return ddt + np.cos(phi) * (eye - ddt) + np.sin(phi) * skew




[docs]
def get_rotation_matrix_around_z_axis(phi: float) -> npt.NDArray:
    """
    Generate a rotation matrix around the z axis.

    Parameters
    ----------
    phi : float
        Angle of rotation around axis in radiants.

    Returns
    -------
    NDArray
        Rotation matrix
    """
    return get_rotation_matrix_around_axis(np.array([0.0, 0.0, 1.0]), phi)




[docs]
def get_mirror_matrix(normal_vector: npt.NDArray) -> npt.NDArray:
    """
    Generate a transformation matrix for mirroring through plane given by the normal.

    Parameters
    ----------
    normal_vector : NDArray
        Normal vector of the mirror plane.

    Returns
    -------
    NDArray
        Mirror matrix
    """
    n_vec = normal_vector / np.linalg.norm(normal_vector)
    eps = np.finfo(np.float64).eps
    a = n_vec[0]
    b = n_vec[1]
    c = n_vec[2]
    M = np.array(
        [
            [1 - 2 * a**2, -2 * a * b, -2 * a * c],
            [-2 * a * b, 1 - 2 * b**2, -2 * b * c],
            [-2 * a * c, -2 * b * c, 1 - 2 * c**2],
        ]
    )
    M[np.abs(M) < eps * 10] = 0
    return M




[docs]
def get_angle_between_vectors(
    vector_1: npt.NDArray, vector_2: npt.NDArray
) -> npt.NDArray:
    """
    Determine angle between two vectors.

    Parameters
    ----------
    vector_1 : NDArray
    vector_2 : NDArray

    Returns
    -------
    float
        Angle in radiants.
    """
    return (
        np.dot(vector_1, vector_2) / np.linalg.norm(vector_1) / np.linalg.norm(vector_2)
    )




[docs]
def get_fractional_coords(
    cartesian_coords: npt.NDArray, lattice_vectors: npt.NDArray
) -> npt.NDArray:
    """
    Transform cartesian coordinates into fractional coordinates.

    Parameters
    ----------
    cartesian_coords: NDArray
        Cartesian coordinates of atoms (can be Nx2 or Nx3)
    lattice_vectors: [N_dim x N_dim] numpy array:
        Matrix of lattice vectors: Each ROW corresponds to one lattice vector!

    Returns
    -------
    fractional_coords: [N x N_dim] numpy array
        Fractional coordinates of atoms
    """
    fractional_coords = np.linalg.solve(lattice_vectors.T, cartesian_coords.T)
    return fractional_coords.T




[docs]
def get_cartesian_coords(
    frac_coords: npt.NDArray, lattice_vectors: npt.NDArray
) -> npt.NDArray:
    """
    Transform fractional coordinates into cartesian coordinates.

    Parameters
    ----------
    frac_coords: NDArray
        Fractional coordinates of atoms (can be Nx2 or Nx3)
    lattice_vectors: NDArray
        Matrix of lattice vectors: Each ROW corresponds to one lattice vector!

    Returns
    -------
    NDArray
        Cartesian coordinates of atoms

    """
    return np.dot(frac_coords, lattice_vectors)




[docs]
def get_triple_product(a: npt.NDArray, b: npt.NDArray, c: npt.NDArray) -> npt.NDArray:
    """
    Get the triple product (DE: Spatprodukt): a*(bxc).

    Parameters
    ----------
    a: NDArray
        TODO
    b: NDArray
        TODO
    c: NDArray
        TODO

    Returns
    -------
    NDarray
        Triple product of each input vector
    """
    if len(a) != 3 or len(b) != 3 or len(c) != 3:
        raise ValueError("Each vector must be of length 3")

    return np.dot(np.cross(a, b), c)




[docs]
def smooth_function(y: npt.NDArray, box_pts: int) -> npt.NDArray[np.floating[Any]]:
    """
    Smooths a function using convolution.

    Parameters
    ----------
    y : NDArray
        TODO
    box_pts : int
        TODO

    Returns
    -------
    y_smooth : NDArray[floating[Any]]
        TODO
    """
    box = np.ones(box_pts) / box_pts
    return np.convolve(y, box, mode="same")




[docs]
def get_cross_correlation_function(
    signal_0: npt.NDArray,
    signal_1: npt.NDArray,
    detrend: bool = False,
) -> npt.NDArray:
    """
    Calculate the autocorrelation function for a given signal.

    Parameters
    ----------
    signal_0 : NDArray
        First siganl for which the correlation function should be calculated.
    signal_1 : NDArray
        Second siganl for which the correlation function should be calculated.

    Returns
    -------
    NDArray
        Autocorrelation function from 0 to max_lag.
    """
    if detrend:
        signal_0 = scipy.signal.detrend(signal_0)
        signal_1 = scipy.signal.detrend(signal_1)

    cross_correlation = np.correlate(signal_0, signal_1, mode="full")
    cross_correlation = cross_correlation[cross_correlation.size // 2 :]

    # normalize by number of overlapping data points
    cross_correlation /= np.arange(cross_correlation.size, 0, -1)
    cutoff = int(cross_correlation.size * 0.75)
    return cross_correlation[:cutoff]




[docs]
def get_autocorrelation_function_manual_lag(
    signal: npt.NDArray, max_lag: int
) -> npt.NDArray:
    """
    Alternative method for autocorrelation of a signal using numpy.corrcoef.

    Allows the lag to be set manually.

    Parameters
    ----------
    signal : NDArray
        Siganl for which the autocorrelation function should be calculated.
    max_lag : int | None
        Autocorrelation will be calculated for a range of 0 to max_lag,
        where max_lag is the largest lag for the calculation of the
        autocorrelation function

    Returns
    -------
    autocorrelation : npt.NDArray
        Autocorrelation function from 0 to max_lag.
    """
    lag = npt.NDArray(range(max_lag))
    autocorrelation = np.array([np.nan] * max_lag)

    for i in lag:
        corr = 1.0 if i == 0 else np.corrcoef(signal[i], signal[:-i])[0][1]
        autocorrelation[i] = corr

    return autocorrelation




[docs]
def get_fourier_transform(
    signal: npt.NDArray, time_step: float
) -> tuple[npt.NDArray, npt.NDArray]:
    """
    Calculate the fourier transform of a given siganl.

    Parameters
    ----------
    signal : NDArray
        Siganl for which the autocorrelation function should be calculated.
    time_step : float
        Time step of the signal in seconds.

    Returns
    -------
    tuple[NDArray, NDArray]
        Frequencs and absolute values of the fourier transform.

    """
    f = scipy.fft.fftfreq(signal.size, d=time_step)
    y = scipy.fft.fft(signal)

    L = f >= 0

    return f[L], y[L]




[docs]
def lorentzian(
    x: float | npt.NDArray[np.float64], a: float, b: float, c: float
) -> float | npt.NDArray[np.float64]:
    """
    Return a Lorentzian function.

    Parameters
    ----------
    x : float | NDArray[float64]
        Argument x of f(x) --> y.
    a : float
        Maximum of Lorentzian.
    b : float
        Width of Lorentzian.
    c : float
        Magnitude of Lorentzian.

    Returns
    -------
    f : float | NDArray[float64]
        Outupt of a Lorentzian function.
    """
    return c / (1.0 + ((x - a) / (b / 2.0)) ** 2)




[docs]
def exponential(
    x: float | npt.NDArray[np.float64], a: float, b: float
) -> float | npt.NDArray[np.float64]:
    """
    Return an exponential function.

    Parameters
    ----------
    x : float | NDArray[float64]
        Argument x of f(x) --> y
    a : float
        TODO
    b : float
        TODO
    """
    return a * np.exp(x * b)




[docs]
def double_exponential(
    x: float | npt.NDArray, a: float, b: float, c: float
) -> float | npt.NDArray:
    """TODO."""
    return a * (np.exp(x * b) + np.exp(x * c))




[docs]
def gaussian_window(N: int, std: float = 0.4) -> npt.NDArray[np.float64]:
    """
    Generate a Gaussian window.

    Parameters
    ----------
    N : int
        Number of points in the window.
    std : float
        Standard deviation of the Gaussian window, normalized
        such that the maximum value occurs at the center of the window.

    Returns
    -------
    window : NDarray[float64]
        Gaussian window of length N.

    """
    n = np.linspace(-1, 1, N)
    return np.exp(-0.5 * (n / std) ** 2)




[docs]
def apply_gaussian_window(
    data: npt.NDArray[np.float64], std: float = 0.4
) -> npt.NDArray[np.float64]:
    """
    Apply a Gaussian window to an array.

    Parameters
    ----------
    data : NDarray[float64]
        Input data array to be windowed.
    std : float
        Standard deviation of the Gaussian window.

    Returns
    -------
    windowed_data : NDArray[float64]
        Windowed data array.
    """
    N = len(data)
    window = gaussian_window(N, std)
    return data * window




[docs]
def hann_window(N: int) -> npt.NDArray[np.float64]:
    """
    Generate a Hann window.

    Parameters
    ----------
    N : int
        Number of points in the window.

    Returns
    -------
    NDArray
        Hann window of length N.
    """
    return 0.5 * (1 - np.cos(2 * np.pi * np.arange(N) / (N - 1)))




[docs]
def apply_window(data: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
    """
    Apply a Hann window to an array.

    Parameters
    ----------
    data : NDarray[float64]
        Input data array to be windowed.

    Returns
    -------
    NDarray[float64]
        Windowed data array.
    """
    N = len(data)
    window = hann_window(N)
    return data * window




[docs]
def norm_matrix_by_dagonal(matrix: npt.NDArray) -> npt.NDArray:
    """
    Norms a matrix such that the diagonal becomes 1.

    | a_11 a_12 a_13 |       |   1   a'_12 a'_13 |
    | a_21 a_22 a_23 |  -->  | a'_21   1   a'_23 |
    | a_31 a_32 a_33 |       | a'_31 a'_32   1   |

    Parameters
    ----------
    matrix : NDArray
        Matrix that should be normed.

    Returns
    -------
    matrix : NDArray
        Normed matrix.
    """
    diagonal = np.array(np.diagonal(matrix))
    L = diagonal == 0.0
    diagonal[L] = 1.0

    new_matrix = deepcopy(matrix)
    new_matrix /= np.sqrt(
        np.tile(diagonal, (matrix.shape[1], 1)).T
        * np.tile(diagonal, (matrix.shape[0], 1))
    )

    return new_matrix




[docs]
def mae(delta: npt.NDArray) -> np.floating:
    """
    Calculate the mean absolute error from a list of value differnces.

    Parameters
    ----------
    delta : NDArray
        Array containing differences

    Returns
    -------
    float
        mean absolute error
    """
    return np.mean(np.abs(delta))




[docs]
def rel_mae(
    delta: npt.NDArray, target_val: npt.NDArray
) -> np.floating | np.float64 | float:
    """
    Compute relative MAE from value differences and corresponding target values.

    Parameters
    ----------
    delta : NDArray
        Array containing differences
    target_val : NDArray
        Array of target values against which the difference should be compared

    Returns
    -------
    float
        relative mean absolute error
    """
    target_norm = np.mean(np.abs(target_val))
    return np.mean(np.abs(delta)).item() / (target_norm + 1e-9)




[docs]
def rmse(delta: npt.NDArray) -> float:
    """
    Calculate the root mean sqare error from a list of value differnces.

    Parameters
    ----------
    delta : NDArray
        Array containing differences

    Returns
    -------
    float
        root mean square error
    """
    return np.sqrt(np.mean(np.square(delta)))




[docs]
def rel_rmse(delta: npt.NDArray, target_val: npt.NDArray) -> float:
    """
    Calculate the relative root mean sqare error from a list of value differences.

    Parameters
    ----------
    delta : NDArray
        Array containing differences
    target_val : NDArray
        Array of target values against which the difference should be compared

    Returns
    -------
    float
        relative root mean sqare error

    """
    target_norm = np.sqrt(np.mean(np.square(target_val)))
    return np.sqrt(np.mean(np.square(delta))) / (target_norm + 1e-9)




[docs]
def get_moving_average(
    signal: npt.NDArray[np.float64], window_size: int
) -> tuple[npt.NDArray[np.floating], npt.NDArray[np.floating]]:
    """
    Cacluated the moving average and the variance around the moving average.

    Parameters
    ----------
    signal : NDArray[float64]
        Signal for which the moving average should be calculated.
    window_size : int
        Window size for the mocing average.

    Returns
    -------
    npt.NDArray[floating]
        Moving average.
    npt.NDArray[floating]
        Variance around the moving average.
    """
    moving_avg = np.convolve(signal, np.ones(window_size) / window_size, mode="valid")
    variance = np.array(
        [
            np.var(signal[i : i + window_size])
            for i in range(len(signal) - window_size + 1)
        ]
    )

    return moving_avg, variance




[docs]
def get_maxima_in_moving_interval(
    function_values: npt.NDArray, interval_size: int, step_size: int, filter_value: int
) -> npt.NDArray:
    """
    Slide an interval along the function, filtering out points below a threshold.

    Points smaller than filter_value times the maximum within the interval are removed.

    Parameters
    ----------
    function_values : NDArray
        1D array of function values.
    interval_size : int
        Size of the interval (number of points).
    step_size : int
        Step size for moving the interval.

    Returns
    -------
    NDArray
        Filtered array where points outside the threshold are set to NaN.
    """
    # Convert input to a NumPy array
    function_values = np.asarray(function_values)

    # Initialize an array to store the result
    filtered_indices = []

    # Slide the interval along the function
    for start in range(0, len(function_values) - interval_size + 1, step_size):
        # Define the end of the interval
        end = start + interval_size

        # Extract the interval
        interval = function_values[start:end]

        # Find the maximum value in the interval
        max_value = np.max(interval)

        # Apply the filter
        threshold = filter_value * max_value

        indices = np.where(interval >= threshold)
        filtered_indices += list(start + indices[0])

    return np.array(list(set(filtered_indices)))




[docs]
def get_pearson_correlation_coefficient(x: npt.NDArray, y: npt.NDArray) -> np.floating:
    """
    TODO.

    Parameters
    ----------
    x : npt.NDArray
        First array of data points.
    y : npt.NDArray
        Second array of data points.

    Returns
    -------
    floating
        Pearson correlation coefficient between x and y.
    """
    mean_x = np.mean(x)
    mean_y = np.mean(y)
    std_x = np.std(x)
    std_y = np.std(y)

    return np.mean((x - mean_x) * (y - mean_y)) / std_x / std_y




[docs]
def get_t_test(x: npt.NDArray, y: npt.NDArray) -> np.floating:
    """
    Calculate the t-test statistic for two sets of data.

    Parameters
    ----------
    x : NDArray
        First array of data points.
    y : NDArray
        Second array of data points.

    Returns
    -------
    floating
        t-test statistic for the two sets of data.
    """
    r = get_pearson_correlation_coefficient(x, y)
    n = len(x)

    return np.abs(r) * np.sqrt((n - 2) / (1 - r**2))




[docs]
def probability_density(t: float, n: int) -> float:
    """
    Probability density function for the t-distribution.

    Parameters
    ----------
    t : float
        The t-value for which the probability density is calculated.
    n : int
        The number of data points in the sample.

    Returns
    -------
    float
        The probability density at the given t-value.
    """
    degrees_of_freedom = n - 2

    return (
        scipy.special.gamma((degrees_of_freedom + 1.0) / 2.0)
        / (
            np.sqrt(np.pi * degrees_of_freedom)
            * scipy.special.gamma(degrees_of_freedom / 2.0)
        )
        * (1 + t**2 / degrees_of_freedom) ** (-(degrees_of_freedom + 1.0) / 2.0)
    )




[docs]
def get_significance(x: npt.NDArray, t: float) -> float:
    """
    Calculate the significance of a t-test statistic.

    Parameters
    ----------
    x : npt.NDArray
        Array of data points.
    t : float
        t-test statistic value.

    Returns
    -------
    float
        The significance level of the t-test statistic.
    """
    n = len(x)

    return scipy.integrate.quad(probability_density, -np.inf, t, args=(n))[0]




[docs]
def squared_exponential_kernel(
    x1_vec: npt.NDArray, x2_vec: npt.NDArray, tau: float
) -> npt.NDArray[np.float64]:
    """
    Return a simple squared exponential kernel to determine a similarity measure.

    Parameters
    ----------
    x1_vec : NDArray
        Descriptor for first set of data of shape
        [data points, descriptor dimensions].
    x2_vec : NDArray
        Descriptor for second set of data of shape
        [data points, descriptor dimensions].
    tau : float
        Correlation length for the descriptor.

    Returns
    -------
    NDArray[float64]
        Matrix contianing pairwise kernel values.
    """
    # Ensure inputs are at least 2D (n_samples, n_features)
    x1 = np.atleast_2d(x1_vec)
    x2 = np.atleast_2d(x2_vec)

    # If they are 1D row-vectors, convert to column vectors
    if x1.shape[0] == 1 or x1.shape[1] == 1:
        x1 = x1.reshape(-1, 1)
    if x2.shape[0] == 1 or x2.shape[1] == 1:
        x2 = x2.reshape(-1, 1)

    # Compute squared distances (broadcasting works for both 1D and 2D now)
    diff = x1[:, np.newaxis, :] - x2[np.newaxis, :, :]
    sq_dist = np.sum(diff**2, axis=2)

    # Apply the RBF formula
    return np.exp(-0.5 * sq_dist / tau**2)




[docs]
class GPR:
    """
    A simple Gaussian Process Regression (GPR) model for interpolation and smoothing.

    Parameters
    ----------
    x : NDArray
        Descriptor of shape [data points, descriptor dimensions].
    y : NDArray
        Data to be learned.
    tau : float
        Correlation length for the descriptor.
    sigma : float
        Uncertainty of the input data.
    """

    def __init__(self, x: npt.NDArray, y: npt.NDArray, tau: float, sigma: float):
        K1 = squared_exponential_kernel(x, x, tau)

        self.K1_inv = np.linalg.inv(K1 + np.eye(len(x)) * sigma)
        self.x = x
        self.y = y
        self.tau = tau
        self.sigma = sigma

    def __call__(self, x: npt.NDArray) -> npt.NDArray:
        return self.predict(x)


[docs]
    def predict(self, x: npt.NDArray) -> npt.NDArray:
        """
        TODO.

        Parameters
        ----------
        x : NDArray
            TODO

        Returns
        -------
        NDArray
            TODO
        """
        K2 = squared_exponential_kernel(x, self.x, self.tau)
        y_test0 = K2.dot(self.K1_inv)

        return y_test0.dot(self.y)