Source code for abm_shape_collection.calculate_shape_statistics

import pandas as pd
from scipy.stats import ks_2samp
from sklearn.decomposition import PCA


[docs]def calculate_shape_statistics( pca: PCA, data: pd.DataFrame, ref_data: pd.DataFrame, components: int, label: str = "shcoeffs", ) -> pd.DataFrame: """ Perform two-sample Kolmogorov-Smirnov test for goodness of fit on shapes. Parameters ---------- pca Fit PCA object. data Sample data, with shape coefficients as columns. ref_data : pd.DataFrame References data, with shape coefficients as columns. components Number of shape coefficients components. label Label for shape coefficients columns. Returns ------- : Kolmogorov-Smirnov statistics and p-values for each component. """ statistics = [] # Transform data into shape mode space. columns = ref_data.filter(like=label).columns ref_transform = pca.transform(ref_data[columns].values) transform = pca.transform(data[columns].values) for component in range(components): # Extract values for specific component. ref_values = ref_transform[:, component] values = transform[:, component] # Calculate Kolmogorov-Smirnov statistic. ks_result = ks_2samp(values, ref_values, mode="asymp") statistics.append( { "FEATURE": f"PC{component + 1}", "SIZE": len(values), "KS_STATISTIC": ks_result.statistic, "KS_PVALUE": ks_result.pvalue, } ) return pd.DataFrame(statistics)