Coverage for src/cell_abm_pipeline/flows/download_images.py: 0%

45 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-06-05 19:14 +0000

1""" 

2Workflow for downloading images from Quilt. 

3 

4Image metadata is loaded from the Quilt package and used to filter for FOVs with 

5the specified number of cells. FOVs are selected to meet the specified number of 

6FOVs within each cell volume bin. For each selected FOV, the image is downloaded 

7to the working location under **images**. 

8""" 

9 

10from dataclasses import dataclass 

11 

12from abm_initialization_collection.image import select_fov_images 

13from io_collection.keys import check_key, make_key 

14from io_collection.load import load_dataframe 

15from io_collection.quilt import load_quilt_package, save_quilt_item 

16from prefect import flow 

17 

18 

19@dataclass 

20class ParametersConfig: 

21 """Parameter configuration for download images flow.""" 

22 

23 cells_per_fov: int 

24 """Number of cells per FOV.""" 

25 

26 bins: list[int] 

27 """Cell volume bin boundaries.""" 

28 

29 counts: list[int] 

30 """Number of FOVs to select from each cell volume bin.""" 

31 

32 quilt_package: str = "aics/hipsc_single_cell_image_dataset" 

33 """Name of Quilt package.""" 

34 

35 quilt_registry: str = "s3://allencell" 

36 """Name of Quilt registry.""" 

37 

38 

39@dataclass 

40class ContextConfig: 

41 """Context configuration for download images flow.""" 

42 

43 working_location: str 

44 """Location for input and output files (local path or S3 bucket).""" 

45 

46 metadata_location: str 

47 """Location of metadata file (local path or S3 bucket).""" 

48 

49 

50@dataclass 

51class SeriesConfig: 

52 """Series configuration for download images flow.""" 

53 

54 name: str 

55 """Name of the simulation series.""" 

56 

57 metadata_key: str 

58 """Key for metadata file.""" 

59 

60 

61@flow(name="download-images") 

62def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None: 

63 """Main download images flow.""" 

64 

65 package = load_quilt_package(parameters.quilt_package, parameters.quilt_registry) 

66 key_exists = check_key(context.metadata_location, series.metadata_key) 

67 

68 if not key_exists: 

69 save_quilt_item(context.metadata_location, series.metadata_key, package, "metadata.csv") 

70 

71 metadata = load_dataframe( 

72 context.metadata_location, 

73 series.metadata_key, 

74 usecols=[ 

75 "CellId", 

76 "cell_stage", 

77 "outlier", 

78 "fov_seg_path", 

79 "this_cell_index", 

80 "MEM_shape_volume", 

81 ], 

82 ) 

83 

84 selected_fovs = select_fov_images( 

85 metadata, parameters.cells_per_fov, parameters.bins, parameters.counts 

86 ) 

87 

88 for fov in selected_fovs: 

89 print(f"key: {fov['key']}") 

90 print(f"include_ids: {', '.join([str(cell_id) for cell_id in fov['cell_ids']])}") 

91 fov_key = make_key(series.name, "images", f"{series.name}_{fov['key']}.ome.tiff") 

92 key_exists = check_key(context.working_location, fov_key) 

93 

94 if not key_exists: 

95 save_quilt_item.submit(context.working_location, fov_key, package, fov["item"])