Coverage for src/cell_abm_pipeline/flows/calculate_image_properties.py: 0%

52 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-06-05 19:14 +0000

1""" 

2Workflow for calculating shape properties from existing images. 

3 

4Working location structure: 

5 

6.. code-block:: bash 

7 

8 (name) 

9 ├── results 

10 │ └── (name)_(key)_(seed).csv 

11 └── calculations 

12 └── calculations.PROPERTIES 

13 ├── (name)_(key)_(seed)_(tick).PROPERTIES.csv 

14 └── (name)_(key)_(seed)_(tick)_(region).PROPERTIES.csv 

15 

16Data from **results** are used to specify existing images, which are then used 

17to calculate properties. Calculations are saved to **calculations.PROPERTIES**. 

18 

19If region is specified, the region is included in the output key. 

20""" 

21 

22from dataclasses import dataclass, field 

23from typing import Optional 

24 

25import pandas as pd 

26from abm_shape_collection import get_shape_properties 

27from io_collection.keys import make_key 

28from io_collection.load import load_dataframe, load_image 

29from io_collection.save import save_dataframe 

30from prefect import flow 

31 

32from cell_abm_pipeline.flows.calculate_properties import SHAPE_PROPERTIES 

33 

34 

35@dataclass 

36class ParametersConfig: 

37 """Parameter configuration for calculate image properties flow.""" 

38 

39 key: str 

40 """Simulation key to calculate.""" 

41 

42 seed: int 

43 """Simulation random seed to calculate.""" 

44 

45 tick: int 

46 """Simulation tick to calculate.""" 

47 

48 channel: int 

49 """Index of channel to calculate.""" 

50 

51 region: Optional[str] = None 

52 """Subcellular region to calculate.""" 

53 

54 properties: list[str] = field(default_factory=lambda: SHAPE_PROPERTIES) 

55 """List of shape properties to calculate.""" 

56 

57 

58@dataclass 

59class ContextConfig: 

60 """Context configuration for calculate image properties flow.""" 

61 

62 working_location: str 

63 """Location for input and output files (local path or S3 bucket).""" 

64 

65 

66@dataclass 

67class SeriesConfig: 

68 """Series configuration for calculate image properties flow.""" 

69 

70 name: str 

71 """Name of the simulation series.""" 

72 

73 

74@flow(name="calculate-image-properties") 

75def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None: 

76 """Main calculate image properties flow.""" 

77 

78 calc_key = make_key(series.name, "calculations", "calculations.PROPERTIES") 

79 series_key = f"{series.name}_{parameters.key}_{parameters.seed:04d}" 

80 

81 results_key = make_key(series.name, "results", f"{series_key}.csv") 

82 results = load_dataframe(context.working_location, results_key) 

83 

84 all_props = [] 

85 

86 for cell_id, image_file in results[results["TICK"] == parameters.tick][["ID", "IMAGE"]].values: 

87 image = load_image("s3://allencell", f"aics/hipsc_single_cell_image_dataset/{image_file}") 

88 array = image.get_image_data("ZYX", T=0, C=parameters.channel) 

89 props = get_shape_properties(array, parameters.properties) 

90 

91 props["KEY"] = parameters.key 

92 props["ID"] = cell_id 

93 props["SEED"] = parameters.seed 

94 props["TICK"] = parameters.tick 

95 

96 all_props.append(props) 

97 

98 props_dataframe = pd.DataFrame(all_props) 

99 

100 region_key = f"_{parameters.region}" if parameters.region is not None else "" 

101 suffix = region_key 

102 

103 props_key = make_key(calc_key, f"{series_key}_{parameters.tick:06d}{suffix}.PROPERTIES.csv") 

104 save_dataframe(context.working_location, props_key, props_dataframe, index=False)