Coverage for src/cell_abm_pipeline/tasks/calculate_category_durations.py: 0%
13 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
« prev ^ index » next coverage.py v7.1.0, created at 2024-06-05 19:14 +0000
1from itertools import groupby
3import pandas as pd
4from prefect import task
7@task
8def calculate_category_durations(
9 data: pd.DataFrame, category: str, key: str, threshold: float = 0
10) -> list[float]:
11 durations: list[float] = []
13 end = data["time"].max()
14 key_data = data[data[category] == key]
16 for _, group in key_data.groupby(["SEED", "ID"]):
17 group.sort_values("time", inplace=True)
18 items = [
19 list(grouping)
20 for valid, grouping in groupby(
21 zip(group["time"], group["time"][1:]), lambda x: x[1] - x[0] < threshold
22 )
23 if valid
24 ]
25 durations = durations + [
26 item[-1][1] - item[0][0] for item in items if item[0][0] != 0 and item[-1][1] != end
27 ]
29 return durations