Skip to content

sopa.segmentation.transcripts

sopa.segmentation.transcripts.resolve(sdata, temp_dir, gene_column, patches_dirs=None, min_area=0, shapes_key=SopaKeys.BAYSOR_BOUNDARIES)

Concatenate all the per-patch segmentation runs and resolve the conflicts. Resulting cells boundaries are saved in the SpatialData object.

Parameters:

Name Type Description Default
sdata SpatialData

A SpatialData object

required
temp_dir str

Temporary directory used to store all the patches subdirectories (one subdirectory for one patch and for one segmentation run)

required
gene_column str

Column of the transcript dataframe containing the genes names

required
patches_dirs list[str] | None

Optional list of subdirectories inside temp_dir to be read. By default, read all.

None
min_area float

Minimum area (in microns^2) for a cell to be kept

0
Source code in sopa/segmentation/transcripts.py
def resolve(
    sdata: SpatialData,
    temp_dir: str,
    gene_column: str,
    patches_dirs: list[str] | None = None,
    min_area: float = 0,
    shapes_key: str = SopaKeys.BAYSOR_BOUNDARIES,
):
    """Concatenate all the per-patch segmentation runs and resolve the conflicts. Resulting cells boundaries are saved in the `SpatialData` object.

    Args:
        sdata: A `SpatialData` object
        temp_dir: Temporary directory used to store all the patches subdirectories (one subdirectory for one patch and for one segmentation run)
        gene_column: Column of the transcript dataframe containing the genes names
        patches_dirs: Optional list of subdirectories inside `temp_dir` to be read. By default, read all.
        min_area: Minimum area (in microns^2) for a cell to be kept
    """
    if min_area > 0:
        log.info(f"Cells whose area is less than {min_area} microns^2 will be removed")

    patches_cells, adatas = _read_all_segmented_patches(temp_dir, min_area, patches_dirs)
    geo_df, cells_indices, new_ids = _resolve_patches(patches_cells, adatas)

    image_key = get_key(sdata, "images")
    points = get_element(sdata, "points")
    transformations = get_transformation(points, get_all=True).copy()

    geo_df = ShapesModel.parse(geo_df, transformations=transformations)

    table_conflicts = []
    if len(new_ids):
        new_cells = geo_df.geometry[cells_indices == -1]
        geo_df_new = gpd.GeoDataFrame({"geometry": new_cells})
        geo_df_new = ShapesModel.parse(geo_df_new, transformations=transformations)

        log.info("Aggregating transcripts on merged cells")
        table_conflicts = aggregate.count_transcripts(sdata, gene_column, geo_df=geo_df_new)
        table_conflicts.obs_names = new_ids
        table_conflicts = [table_conflicts]

    valid_ids = set(list(geo_df.index))
    table = anndata.concat(
        [adata[list(valid_ids & set(list(adata.obs_names)))] for adata in adatas] + table_conflicts,
        join="outer",
    )
    table.obs.dropna(axis="columns", inplace=True)

    geo_df = geo_df.loc[table.obs_names]

    table.obsm["spatial"] = np.array([[centroid.x, centroid.y] for centroid in geo_df.centroid])
    table.obs[SopaKeys.REGION_KEY] = pd.Series(shapes_key, index=table.obs_names, dtype="category")
    table.obs[SopaKeys.SLIDE_KEY] = pd.Series(image_key, index=table.obs_names, dtype="category")
    table.obs[SopaKeys.INSTANCE_KEY] = geo_df.index

    table = TableModel.parse(
        table,
        region_key=SopaKeys.REGION_KEY,
        region=shapes_key,
        instance_key=SopaKeys.INSTANCE_KEY,
    )

    sdata.shapes[shapes_key] = geo_df
    sdata.tables[SopaKeys.TABLE] = table

    if sdata.is_backed():
        sdata.write_element(shapes_key, overwrite=True)
        sdata.write_element(SopaKeys.TABLE, overwrite=True)

    log.info(
        f"Added sdata.tables['{SopaKeys.TABLE}'], and {len(geo_df)} cell boundaries to sdata['{shapes_key}']"
    )