Coverage for src/cell_abm_pipeline/flows/process_sample.py: 0%

61 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-06-05 19:14 +0000

1""" 

2Workflow for processing image samples. 

3 

4Working location structure: 

5 

6.. code-block:: bash 

7 

8 (name) 

9 ├── plots 

10 │ └── plots.SAMPLE 

11 │ └── (name)_(key).SAMPLE.png 

12 └── samples 

13 ├── samples.PROCESSED 

14 │ └── (name)_(key).PROCESSED.csv 

15 └── samples.RAW 

16 └── (name)_(key).RAW.csv 

17 

18Samples to be processed are loaded from **samples.RAW**. Resulting processed 

19sample(s) are placed into **samples.PROCESSED** and corresponding contact 

20sheet(s) are placed into **plots.SAMPLE**. Note that these contact sheet(s) will 

21overwrite existing contact sheets generated by the sample images task. 

22""" 

23 

24from dataclasses import dataclass 

25from typing import Optional 

26 

27from abm_initialization_collection.image import plot_contact_sheet 

28from abm_initialization_collection.sample import ( 

29 exclude_selected_ids, 

30 include_selected_ids, 

31 remove_edge_regions, 

32 remove_unconnected_regions, 

33) 

34from io_collection.keys import make_key 

35from io_collection.load import load_dataframe 

36from io_collection.save import save_dataframe, save_figure 

37from prefect import flow 

38 

39# Default distance for removing unconnected regions. 

40UNCONNECTED_THRESHOLD: float = 2.0 

41 

42# Default number of edge positions per axis needed to assign edge region. 

43EDGE_THRESHOLD: int = 1 

44 

45# Default distance from axis limits to assign edge positions. 

46EDGE_PADDING: float = 1.0 

47 

48 

49@dataclass 

50class ParametersConfig: 

51 """Parameter configuration for process sample flow.""" 

52 

53 key: str 

54 """Sample key to process.""" 

55 

56 remove_unconnected: bool = True 

57 """True to remove unconnected regions, False otherwise.""" 

58 

59 unconnected_threshold: float = UNCONNECTED_THRESHOLD 

60 """Distance for removing unconnected regions.""" 

61 

62 unconnected_filter: str = "connectivity" 

63 """Filter type for assigning unconnected coordinates.""" 

64 

65 remove_edges: bool = True 

66 """True to remove cells touching the edge of the bounds, False otherwise.""" 

67 

68 edge_threshold: int = EDGE_THRESHOLD 

69 """Number of edge positions per axis needed to assign edge region.""" 

70 

71 edge_padding: float = EDGE_PADDING 

72 """Distance from axis limits to assign edge positions.""" 

73 

74 include_ids: Optional[list[int]] = None 

75 """List of ids to include.""" 

76 

77 exclude_ids: Optional[list[int]] = None 

78 """List of ids to exclude.""" 

79 

80 contact_sheet: bool = True 

81 """True to save contact sheet of processed samples, False otherwise.""" 

82 

83 

84@dataclass 

85class ContextConfig: 

86 """Context configuration for process sample flow.""" 

87 

88 working_location: str 

89 """Location for input and output files (local path or S3 bucket).""" 

90 

91 

92@dataclass 

93class SeriesConfig: 

94 """Series configuration for process sample flow.""" 

95 

96 name: str 

97 """Name of the simulation series.""" 

98 

99 

100@flow(name="process-sample") 

101def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None: 

102 """Main process sample flow.""" 

103 

104 item_key = f"{series.name}_{parameters.key}" 

105 sample_key = make_key(series.name, "samples", "samples.RAW", f"{item_key}.RAW.csv") 

106 

107 raw_samples = load_dataframe(context.working_location, sample_key) 

108 processed_samples = raw_samples.copy() 

109 

110 if parameters.remove_unconnected: 

111 processed_samples = remove_unconnected_regions( 

112 processed_samples, parameters.unconnected_threshold, parameters.unconnected_filter 

113 ) 

114 

115 if parameters.remove_edges: 

116 processed_samples = remove_edge_regions( 

117 processed_samples, parameters.edge_threshold, parameters.edge_padding 

118 ) 

119 

120 if parameters.include_ids is not None: 

121 processed_samples = include_selected_ids(processed_samples, parameters.include_ids) 

122 

123 if parameters.exclude_ids is not None: 

124 processed_samples = exclude_selected_ids(processed_samples, parameters.exclude_ids) 

125 

126 processed_key = make_key( 

127 series.name, "samples", "samples.PROCESSED", f"{item_key}.PROCESSED.csv" 

128 ) 

129 save_dataframe(context.working_location, processed_key, processed_samples, index=False) 

130 

131 if parameters.contact_sheet: 

132 contact_sheet = plot_contact_sheet(processed_samples, raw_samples) 

133 plot_key = make_key(series.name, "plots", "plots.SAMPLE", f"{item_key}.SAMPLE.png") 

134 save_figure(context.working_location, plot_key, contact_sheet)