Source code for cell_abm_pipeline.tasks.calculate_category_durations

from itertools import groupby

import pandas as pd
from prefect import task


[docs]@task def calculate_category_durations( data: pd.DataFrame, category: str, key: str, threshold: float = 0 ) -> list[float]: durations: list[float] = [] end = data["time"].max() key_data = data[data[category] == key] for _, group in key_data.groupby(["SEED", "ID"]): group.sort_values("time", inplace=True) items = [ list(grouping) for valid, grouping in groupby( zip(group["time"], group["time"][1:]), lambda x: x[1] - x[0] < threshold ) if valid ] durations = durations + [ item[-1][1] - item[0][0] for item in items if item[0][0] != 0 and item[-1][1] != end ] return durations