Coverage for src/abm_shape_collection/calculate_shape_statistics.py: 100%
14 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-09-25 19:34 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-09-25 19:34 +0000
1import pandas as pd
2from scipy.stats import ks_2samp
3from sklearn.decomposition import PCA
6def calculate_shape_statistics(
7 pca: PCA,
8 data: pd.DataFrame,
9 ref_data: pd.DataFrame,
10 components: int,
11 label: str = "shcoeffs",
12) -> pd.DataFrame:
13 """
14 Perform two-sample Kolmogorov-Smirnov test for goodness of fit on shapes.
16 Parameters
17 ----------
18 pca
19 Fit PCA object.
20 data
21 Sample data, with shape coefficients as columns.
22 ref_data : pd.DataFrame
23 References data, with shape coefficients as columns.
24 components
25 Number of shape coefficients components.
26 label
27 Label for shape coefficients columns.
29 Returns
30 -------
31 :
32 Kolmogorov-Smirnov statistics and p-values for each component.
33 """
35 statistics = []
37 # Transform data into shape mode space.
38 columns = ref_data.filter(like=label).columns
39 ref_transform = pca.transform(ref_data[columns].values)
40 transform = pca.transform(data[columns].values)
42 for component in range(components):
43 # Extract values for specific component.
44 ref_values = ref_transform[:, component]
45 values = transform[:, component]
47 # Calculate Kolmogorov-Smirnov statistic.
48 ks_result = ks_2samp(values, ref_values, mode="asymp")
50 statistics.append(
51 {
52 "FEATURE": f"PC{component + 1}",
53 "SIZE": len(values),
54 "KS_STATISTIC": ks_result.statistic,
55 "KS_PVALUE": ks_result.pvalue,
56 }
57 )
59 return pd.DataFrame(statistics)