Coverage for src/cell_abm_pipeline/flows/plot_resource_usage.py: 0%

56 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-06-05 19:14 +0000

1""" 

2Workflow for plotting resource usage. 

3 

4Working location structure: 

5 

6.. code-block:: bash 

7 

8 (name) 

9 ├── groups 

10 │ └── groups.RESOURCE_USAGE 

11 │ ├── (name).object_storage.csv 

12 │ └── (name).wall_clock.csv 

13 └── plots 

14 └── plots.RESOURCE_USAGE 

15 ├── (name).object_storage.(category).png 

16 └── (name).wall_clock.png 

17 

18Plots use grouped data from **groups.RESOURCE_USAGE**. Plots are saved to 

19**plots.RESOURCE_USAGE**. 

20""" 

21 

22from dataclasses import dataclass, field 

23 

24from io_collection.keys import make_key 

25from io_collection.load import load_dataframe 

26from io_collection.save import save_figure 

27from prefect import flow 

28 

29from cell_abm_pipeline.flows.group_resource_usage import OBJECT_CATEGORIES 

30from cell_abm_pipeline.tasks import make_box_figure 

31 

32PLOTS: list[str] = [ 

33 "object_storage", 

34 "wall_clock", 

35] 

36 

37 

38@dataclass 

39class ParametersConfigObjectStorage: 

40 """Parameter configuration for plot resouce usage subflow - object storage.""" 

41 

42 categories: list[str] = field(default_factory=lambda: OBJECT_CATEGORIES) 

43 """List of object storage categories.""" 

44 

45 

46@dataclass 

47class ParametersConfigWallClock: 

48 """Parameter configuration for plot resouce usage subflow - object storage.""" 

49 

50 

51@dataclass 

52class ParametersConfig: 

53 """Parameter configuration for plot resource usage flow.""" 

54 

55 plots: list[str] = field(default_factory=lambda: PLOTS) 

56 """List of resource usage plots.""" 

57 

58 object_storage: ParametersConfigObjectStorage = ParametersConfigObjectStorage() 

59 """Parameters for plot object storage subflow.""" 

60 

61 wall_clock: ParametersConfigWallClock = ParametersConfigWallClock() 

62 """Parameters for plot wall clock subflow.""" 

63 

64 

65@dataclass 

66class ContextConfig: 

67 """Context configuration for plot resource usage flow.""" 

68 

69 working_location: str 

70 """Location for input and output files (local path or S3 bucket).""" 

71 

72 

73@dataclass 

74class SeriesConfig: 

75 """Series configuration for plot resource usage flow.""" 

76 

77 name: str 

78 """Name of the simulation series.""" 

79 

80 conditions: list[dict] 

81 """List of series condition dictionaries (must include unique condition "key").""" 

82 

83 

84@flow(name="plot-resource-usage") 

85def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None: 

86 """ 

87 Main plot resource usage flow. 

88 

89 Calls the following subflows, if the plot is specified: 

90 

91 - :py:func:`run_flow_plot_object_storage` 

92 - :py:func:`run_flow_plot_wall_clock` 

93 """ 

94 

95 if "object_storage" in parameters.plots: 

96 run_flow_plot_object_storage(context, series, parameters.object_storage) 

97 

98 if "wall_clock" in parameters.plots: 

99 run_flow_plot_wall_clock(context, series, parameters.wall_clock) 

100 

101 

102@flow(name="plot-resource-usage_plot-object-storage") 

103def run_flow_plot_object_storage( 

104 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigObjectStorage 

105) -> None: 

106 """Plot resource usage subflow for object storage.""" 

107 

108 group_key = make_key(series.name, "groups", "groups.RESOURCE_USAGE") 

109 plot_key = make_key(series.name, "plots", "plots.RESOURCE_USAGE") 

110 keys = [condition["key"] for condition in series.conditions] 

111 

112 group = load_dataframe( 

113 context.working_location, 

114 make_key(group_key, f"{series.name}.object_storage.csv"), 

115 ) 

116 

117 group["value"] = group["size"] / 1024**2 

118 

119 for category in parameters.categories: 

120 category_group = group[group["category"] == category] 

121 

122 save_figure( 

123 context.working_location, 

124 make_key(plot_key, f"{series.name}.object_storage.{category}.png"), 

125 make_box_figure(keys, category_group, ylabel="Object storage size (MiB)"), 

126 ) 

127 

128 

129@flow(name="plot-resource-usage_plot-wall-clock") 

130def run_flow_plot_wall_clock( 

131 context: ContextConfig, series: SeriesConfig, parameters: ParametersConfigWallClock 

132) -> None: 

133 """Plot resource usage subflow for wall clock.""" 

134 

135 group_key = make_key(series.name, "groups", "groups.RESOURCE_USAGE") 

136 plot_key = make_key(series.name, "plots", "plots.RESOURCE_USAGE") 

137 keys = [condition["key"] for condition in series.conditions] 

138 

139 group = load_dataframe( 

140 context.working_location, 

141 make_key(group_key, f"{series.name}.wall_clock.csv"), 

142 ) 

143 

144 group["value"] = group["time"] / 60 

145 

146 save_figure( 

147 context.working_location, 

148 make_key(plot_key, f"{series.name}.wall_clock.png"), 

149 make_box_figure(keys, group, ylabel="Wall clock time (hr)"), 

150 )