Coverage for src/cell_abm_pipeline/flows/calculate_coefficients.py: 0%

78 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-06-05 19:14 +0000

1""" 

2Workflow for calculating spherical harmonic coefficients. 

3 

4Working location structure: 

5 

6.. code-block:: bash 

7 

8 (name) 

9 ├── data 

10 │ └── data.LOCATIONS 

11 │ └── (name)_(key)_(seed).LOCATIONS.tar.xz 

12 └── calculations 

13 └── calculations.COEFFICIENTS 

14 ├── (name)_(key)_(seed)_(tick).COEFFICIENTS.csv 

15 └── (name)_(key)_(seed)_(tick)_(region).COEFFICIENTS.csv 

16 

17Data from **data.LOCATIONS** are used to calculate coefficients, which are saved 

18to **calculations.COEFFICIENTS**. 

19 

20If region is specified, the region is included in the output key. For 

21calculations with offset but no chunking, the output key extension starts with 

22``.(offset).`` to specify the index offset. For calculations with chunking, the 

23output key extension starts with ``.(offset).(chunk).`` to specify the index 

24offset and chunk size. 

25""" 

26 

27from dataclasses import dataclass 

28from typing import Optional 

29 

30import pandas as pd 

31from abm_shape_collection import get_shape_coefficients, make_voxels_array 

32from arcade_collection.output import extract_tick_json, get_location_voxels 

33from io_collection.keys import make_key 

34from io_collection.load import load_tar 

35from io_collection.save import save_dataframe 

36from prefect import flow 

37 

38COEFFICIENT_ORDER = 16 

39 

40 

41@dataclass 

42class ParametersConfig: 

43 """Parameter configuration for calculate coefficients flow.""" 

44 

45 key: str 

46 """Simulation key to calculate.""" 

47 

48 seed: int 

49 """Simulation random seed to calculate.""" 

50 

51 tick: int 

52 """Simulation tick to calculate.""" 

53 

54 offset: int = 0 

55 """Index offset for skipped calculations.""" 

56 

57 chunk: Optional[int] = None 

58 """Number of indices to calculate, starting from offset.""" 

59 

60 region: Optional[str] = None 

61 """Subcellular region to calculate.""" 

62 

63 scale: int = 1 

64 """Rescaling factor for image array.""" 

65 

66 order: int = COEFFICIENT_ORDER 

67 """Order of the spherical harmonics coefficient parametrization.""" 

68 

69 

70@dataclass 

71class ContextConfig: 

72 """Context configuration for calculate coefficients flow.""" 

73 

74 working_location: str 

75 """Location for input and output files (local path or S3 bucket).""" 

76 

77 

78@dataclass 

79class SeriesConfig: 

80 """Series configuration for calculate coefficients flow.""" 

81 

82 name: str 

83 """Name of the simulation series.""" 

84 

85 

86@flow(name="calculate-coefficients") 

87def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None: 

88 """Main calculate coefficients flow.""" 

89 

90 data_key = make_key(series.name, "data", "data.LOCATIONS") 

91 calc_key = make_key(series.name, "calculations", "calculations.COEFFICIENTS") 

92 series_key = f"{series.name}_{parameters.key}_{parameters.seed:04d}" 

93 

94 locations_key = make_key(data_key, f"{series_key}.LOCATIONS.tar.xz") 

95 locations_tar = load_tar(context.working_location, locations_key) 

96 locations_json = extract_tick_json(locations_tar, series_key, parameters.tick, "LOCATIONS") 

97 

98 all_coeffs = [] 

99 

100 count = 0 

101 

102 for i, location in enumerate(locations_json): 

103 if i < parameters.offset: 

104 continue 

105 

106 count = count + 1 

107 if parameters.chunk is not None and count > parameters.chunk: 

108 break 

109 

110 voxels = get_location_voxels(location) 

111 

112 if len(voxels) == 0: 

113 continue 

114 

115 array = make_voxels_array(voxels, None, parameters.scale) 

116 

117 if parameters.region is not None: 

118 region_voxels = get_location_voxels(location, parameters.region) 

119 

120 if len(region_voxels) == 0: 

121 continue 

122 

123 region_array = make_voxels_array(region_voxels, None, parameters.scale) 

124 coeffs = get_shape_coefficients(region_array, array, parameters.order) 

125 else: 

126 coeffs = get_shape_coefficients(array, array, parameters.order) 

127 

128 coeffs["KEY"] = parameters.key 

129 coeffs["ID"] = location["id"] 

130 coeffs["SEED"] = parameters.seed 

131 coeffs["TICK"] = parameters.tick 

132 

133 all_coeffs.append(coeffs) 

134 

135 coeffs_dataframe = pd.DataFrame(all_coeffs) 

136 

137 chunk_key = "" 

138 offset_key = f".{parameters.offset:04d}" if parameters.offset > 0 else "" 

139 

140 if parameters.chunk is not None: 

141 chunk_key = f".{parameters.chunk:04d}" 

142 offset_key = f".{parameters.offset:04d}" 

143 

144 region_key = f"_{parameters.region}" if parameters.region is not None else "" 

145 suffix = f"{region_key}{offset_key}{chunk_key}" 

146 

147 coeffs_key = make_key(calc_key, f"{series_key}_{parameters.tick:06d}{suffix}.COEFFICIENTS.csv") 

148 save_dataframe(context.working_location, coeffs_key, coeffs_dataframe, index=False)