Coverage for src/abm_shape_collection/calculate_shape_statistics.py: 100%

14 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-09-25 19:34 +0000

1import pandas as pd 

2from scipy.stats import ks_2samp 

3from sklearn.decomposition import PCA 

4 

5 

6def calculate_shape_statistics( 

7 pca: PCA, 

8 data: pd.DataFrame, 

9 ref_data: pd.DataFrame, 

10 components: int, 

11 label: str = "shcoeffs", 

12) -> pd.DataFrame: 

13 """ 

14 Perform two-sample Kolmogorov-Smirnov test for goodness of fit on shapes. 

15 

16 Parameters 

17 ---------- 

18 pca 

19 Fit PCA object. 

20 data 

21 Sample data, with shape coefficients as columns. 

22 ref_data : pd.DataFrame 

23 References data, with shape coefficients as columns. 

24 components 

25 Number of shape coefficients components. 

26 label 

27 Label for shape coefficients columns. 

28 

29 Returns 

30 ------- 

31 : 

32 Kolmogorov-Smirnov statistics and p-values for each component. 

33 """ 

34 

35 statistics = [] 

36 

37 # Transform data into shape mode space. 

38 columns = ref_data.filter(like=label).columns 

39 ref_transform = pca.transform(ref_data[columns].values) 

40 transform = pca.transform(data[columns].values) 

41 

42 for component in range(components): 

43 # Extract values for specific component. 

44 ref_values = ref_transform[:, component] 

45 values = transform[:, component] 

46 

47 # Calculate Kolmogorov-Smirnov statistic. 

48 ks_result = ks_2samp(values, ref_values, mode="asymp") 

49 

50 statistics.append( 

51 { 

52 "FEATURE": f"PC{component + 1}", 

53 "SIZE": len(values), 

54 "KS_STATISTIC": ks_result.statistic, 

55 "KS_PVALUE": ks_result.pvalue, 

56 } 

57 ) 

58 

59 return pd.DataFrame(statistics)