Coverage for src/cell_abm_pipeline/flows/parse_physicell_simulations.py: 0%

36 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-06-05 19:14 +0000

1""" 

2Workflow for parsing PhysiCell simulations into tidy data. 

3 

4Working location structure: 

5 

6.. code-block:: bash 

7 

8 (name) 

9 ├── data 

10 │ └── (name)_(key)_(seed).tar.xz 

11 └── results 

12 └── (name)_(key)_(seed).csv 

13 

14Data from **data** are parsed into **results**. 

15""" 

16 

17from dataclasses import dataclass, field 

18 

19from container_collection.manifest import filter_manifest_files 

20from io_collection.keys import make_key 

21from io_collection.load import load_dataframe, load_tar 

22from io_collection.save import save_dataframe 

23from prefect import flow 

24 

25from cell_abm_pipeline.tasks.physicell import parse_mcds_file 

26 

27 

28@dataclass 

29class ParametersConfig: 

30 """Parameter configuration for parse physicell simulations flow.""" 

31 

32 include_filters: list[str] = field(default_factory=lambda: ["*"]) 

33 """List of Unix filename patterns for files to include in parsing.""" 

34 

35 exclude_filters: list[str] = field(default_factory=lambda: []) 

36 """List of Unix filename patterns for files to exclude from parsing.""" 

37 

38 

39@dataclass 

40class ContextConfig: 

41 """Context configuration for parse physicell simulations flow.""" 

42 

43 working_location: str 

44 """Location for input and output files (local path or S3 bucket).""" 

45 

46 manifest_location: str 

47 """Location of manifest file (local path or S3 bucket).""" 

48 

49 

50@dataclass 

51class SeriesConfig: 

52 """Series configuration for parse physicell simulations flow.""" 

53 

54 name: str 

55 """Name of the simulation series.""" 

56 

57 manifest_key: str 

58 """Key for manifest file.""" 

59 

60 extensions: list[str] 

61 """List of file extensions in complete run.""" 

62 

63 

64@flow(name="parse-physicell-simulations") 

65def run_flow(context: ContextConfig, series: SeriesConfig, parameters: ParametersConfig) -> None: 

66 """Main parse physicell simulations flow.""" 

67 

68 manifest = load_dataframe(context.manifest_location, series.manifest_key) 

69 filtered_files = filter_manifest_files( 

70 manifest, series.extensions, parameters.include_filters, parameters.exclude_filters 

71 ) 

72 

73 for key, files in filtered_files.items(): 

74 tar_file = load_tar(**files["tar.xz"]) 

75 results = parse_mcds_file(tar_file) 

76 

77 results_key = make_key(series.name, "{{timestamp}}", "results", f"{key}.csv") 

78 save_dataframe(context.working_location, results_key, results, index=False)