Segmentation

Cell segmentation

`sopa.segmentation.cellpose(sdata, channels, diameter, model_type='cyto3', pretrained_model=None, gpu=False, image_key=None, min_area=None, delete_cache=True, recover=False, flow_threshold=2, cellprob_threshold=-6, clip_limit=0.2, clahe_kernel_size=None, gaussian_sigma=1, key_added=SopaKeys.CELLPOSE_BOUNDARIES, cellpose_model_kwargs=None, **cellpose_eval_kwargs)`

Run Cellpose segmentation on a SpatialData object, and add a GeoDataFrame containing the cell boundaries.

Cellpose installation

Make sure to install the cellpose extra (pip install 'sopa[cellpose]') for this method to work.

Diameter parameter

The diameter parameter is used to estimate the expected cell diameter (in pixels). This is a crucial parameter for the segmentation.

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object	required
`channels`	`list[str] \| str`	Name of the channel(s) to be used for segmentation. If one channel, must be a nucleus channel. If a `list` of channels, it must be a cytoplasmic channel and then a nucleus channel.	required
`diameter`	`int`	The Cellpose parameter for the expected cell diameter (in pixel).	required
`model_type`	`str`	Cellpose model type.	`'cyto3'`
`pretrained_model`	`str \| None`	Path to the pretrained model to be loaded, or `None`	`None`
`gpu`	`bool`	Whether to use GPU for segmentation.	`False`
`image_key`	`str \| None`	Name of the image in `sdata` to be used for segmentation.	`None`
`min_area`	`int \| None`	Minimum area of a cell to be considered. By default, it is calculated based on the `diameter` parameter.	`None`
`delete_cache`	`bool`	Whether to delete the cache after segmentation.	`True`
`recover`	`bool`	If `True`, recover the cache from a failed segmentation, and continue.	`False`
`flow_threshold`	`float`	Cellpose `flow_threshold` parameter.	`2`
`cellprob_threshold`	`float`	Cellpose `cellprob_threshold` parameter.	`-6`
`clip_limit`	`float`	Parameter for skimage.exposure.equalize_adapthist (applied before running cellpose)	`0.2`
`clahe_kernel_size`	`int \| list[int] \| None`	Parameter for skimage.exposure.equalize_adapthist (applied before running cellpose)	`None`
`gaussian_sigma`	`float`	Parameter for scipy gaussian_filter (applied before running cellpose)	`1`
`key_added`	`str`	Name of the shapes element to be added to `sdata`.	`CELLPOSE_BOUNDARIES`
`cellpose_model_kwargs`	`dict \| None`	Dictionary of kwargs to be provided to the `cellpose.models.CellposeModel` object.	`None`
`**cellpose_eval_kwargs`	`int`	Kwargs to be provided to `model.eval` (where `model` is a `cellpose.models.CellposeModel` object)	`{}`

Source code in sopa/segmentation/methods/_cellpose.py

def cellpose(
    sdata: SpatialData,
    channels: list[str] | str,
    diameter: int,
    model_type: str = "cyto3",
    pretrained_model: str | None = None,
    gpu: bool = False,
    image_key: str | None = None,
    min_area: int | None = None,
    delete_cache: bool = True,
    recover: bool = False,
    flow_threshold: float = 2,
    cellprob_threshold: float = -6,
    clip_limit: float = 0.2,
    clahe_kernel_size: int | list[int] | None = None,
    gaussian_sigma: float = 1,
    key_added: str = SopaKeys.CELLPOSE_BOUNDARIES,
    cellpose_model_kwargs: dict | None = None,
    **cellpose_eval_kwargs: int,
):
    """Run [Cellpose](https://cellpose.readthedocs.io/en/latest/) segmentation on a SpatialData object, and add a GeoDataFrame containing the cell boundaries.

    !!! warning "Cellpose installation"
        Make sure to install the cellpose extra (`pip install 'sopa[cellpose]'`) for this method to work.

    !!! info "Diameter parameter"
        The `diameter` parameter is used to estimate the expected cell diameter (in pixels). This is a crucial parameter for the segmentation.

    Args:
        sdata: A `SpatialData` object
        channels: Name of the channel(s) to be used for segmentation. If one channel, must be a nucleus channel. If a `list` of channels, it must be a cytoplasmic channel and then a nucleus channel.
        diameter: The Cellpose parameter for the expected cell diameter (in pixel).
        model_type: Cellpose model type.
        pretrained_model: Path to the pretrained model to be loaded, or `None`
        gpu: Whether to use GPU for segmentation.
        image_key: Name of the image in `sdata` to be used for segmentation.
        min_area: Minimum area of a cell to be considered. By default, it is calculated based on the `diameter` parameter.
        delete_cache: Whether to delete the cache after segmentation.
        recover: If `True`, recover the cache from a failed segmentation, and continue.
        flow_threshold: Cellpose `flow_threshold` parameter.
        cellprob_threshold: Cellpose `cellprob_threshold` parameter.
        clip_limit: Parameter for skimage.exposure.equalize_adapthist (applied before running cellpose)
        clahe_kernel_size: Parameter for skimage.exposure.equalize_adapthist (applied before running cellpose)
        gaussian_sigma: Parameter for scipy gaussian_filter (applied before running cellpose)
        key_added: Name of the shapes element to be added to `sdata`.
        cellpose_model_kwargs: Dictionary of kwargs to be provided to the `cellpose.models.CellposeModel` object.
        **cellpose_eval_kwargs: Kwargs to be provided to `model.eval` (where `model` is a `cellpose.models.CellposeModel` object)
    """
    channels = channels if isinstance(channels, list) else [channels]

    method = cellpose_patch(
        diameter=diameter,
        channels=channels,
        model_type=model_type,
        pretrained_model=pretrained_model,
        gpu=gpu,
        flow_threshold=flow_threshold,
        cellprob_threshold=cellprob_threshold,
        cellpose_model_kwargs=cellpose_model_kwargs,
        **cellpose_eval_kwargs,
    )

    if min_area is None:
        min_area = (diameter / 2) ** 2  # by default, about 15% of the "normal cell" area

    custom_staining_based(
        sdata,
        method,
        channels,
        image_key=image_key,
        min_area=min_area,
        delete_cache=delete_cache,
        recover=recover,
        clip_limit=clip_limit,
        clahe_kernel_size=clahe_kernel_size,
        gaussian_sigma=gaussian_sigma,
        cache_dir_name=key_added,
        key_added=key_added,
    )

`sopa.segmentation.stardist(sdata, model_type='2D_versatile_he', image_key=None, channels=None, min_area=0, delete_cache=True, recover=False, prob_thresh=0.2, nms_thresh=0.6, clip_limit=0, clahe_kernel_size=None, gaussian_sigma=0, key_added=SopaKeys.STARDIST_BOUNDARIES, **stardist_eval_kwargs)`

Run Stardist segmentation on a SpatialData object, and add a GeoDataFrame containing the cell boundaries.

Stardist installation

Make sure to install the stardist extra (pip install 'sopa[stardist]') for this method to work.

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object	required
`model_type`	`str`	Stardist model name.	`'2D_versatile_he'`
`image_key`	`str \| None`	Name of the image in `sdata` to be used for segmentation.	`None`
`channels`	`list[str] \| str \| None`	One or a list of channel names used for segmentation. None assumes RGB image.	`None`
`min_area`	`int`	Minimum area of a cell to be considered.	`0`
`delete_cache`	`bool`	Whether to delete the cache after segmentation.	`True`
`recover`	`bool`	If `True`, recover the cache from a failed segmentation, and continue.	`False`
`prob_thresh`	`float`	Stardist `prob_thresh` parameter.	`0.2`
`nms_thresh`	`float`	Stardist `nms_thresh` parameter.	`0.6`
`clip_limit`	`float`	Parameter for skimage.exposure.equalize_adapthist (applied before running stardist)	`0`
`clahe_kernel_size`	`int \| list[int] \| None`	Parameter for skimage.exposure.equalize_adapthist (applied before running stardist)	`None`
`gaussian_sigma`	`float`	Parameter for scipy gaussian_filter (applied before running stardist)	`0`
`key_added`	`str`	Name of the shapes element to be added to `sdata`.	`STARDIST_BOUNDARIES`
`**stardist_eval_kwargs`	`int`	Kwargs to be provided to `model.predict_instances` (where `model` is a `stardist.models.StarDist2D` object)	`{}`

Source code in sopa/segmentation/methods/_stardist.py

def stardist(
    sdata: SpatialData,
    model_type: str = "2D_versatile_he",
    image_key: str | None = None,
    channels: list[str] | str | None = None,
    min_area: int = 0,
    delete_cache: bool = True,
    recover: bool = False,
    prob_thresh: float = 0.2,
    nms_thresh: float = 0.6,
    clip_limit: float = 0,
    clahe_kernel_size: int | list[int] | None = None,
    gaussian_sigma: float = 0,
    key_added: str = SopaKeys.STARDIST_BOUNDARIES,
    **stardist_eval_kwargs: int,
):
    """Run [Stardist](https://github.com/stardist/stardist) segmentation on a SpatialData object, and add a GeoDataFrame containing the cell boundaries.

    !!! warning "Stardist installation"
        Make sure to install the stardist extra (`pip install 'sopa[stardist]'`) for this method to work.

    Args:
        sdata: A `SpatialData` object
        model_type: Stardist model name.
        image_key: Name of the image in `sdata` to be used for segmentation.
        channels: One or a list of channel names used for segmentation. None assumes RGB image.
        min_area: Minimum area of a cell to be considered.
        delete_cache: Whether to delete the cache after segmentation.
        recover: If `True`, recover the cache from a failed segmentation, and continue.
        prob_thresh: Stardist `prob_thresh` parameter.
        nms_thresh: Stardist `nms_thresh` parameter.
        clip_limit: Parameter for skimage.exposure.equalize_adapthist (applied before running stardist)
        clahe_kernel_size: Parameter for skimage.exposure.equalize_adapthist (applied before running stardist)
        gaussian_sigma: Parameter for scipy gaussian_filter (applied before running stardist)
        key_added: Name of the shapes element to be added to `sdata`.
        **stardist_eval_kwargs: Kwargs to be provided to `model.predict_instances` (where `model` is a `stardist.models.StarDist2D` object)
    """
    method = stardist_patch(
        model_type=model_type,
        prob_thresh=prob_thresh,
        nms_thresh=nms_thresh,
        **stardist_eval_kwargs,
    )

    custom_staining_based(
        sdata,
        method,
        channels=channels,
        image_key=image_key,
        min_area=min_area,
        delete_cache=delete_cache,
        recover=recover,
        clip_limit=clip_limit,
        clahe_kernel_size=clahe_kernel_size,
        gaussian_sigma=gaussian_sigma,
        cache_dir_name=key_added,
        key_added=key_added,
    )

`sopa.segmentation.baysor(sdata, config=None, min_area=0, delete_cache=True, recover=False, force=False, scale=None, key_added=SopaKeys.BAYSOR_BOUNDARIES, patch_index=None)`

Run Baysor segmentation on a SpatialData object, and add a GeoDataFrame containing the cell boundaries.

Baysor installation

Make sure to install Baysor, and either have the executable at ~/.julia/bin/baysor, or create an alias called baysor that points to the binary executable. Also, you'll need to install sopa with the baysor extra: pip install 'sopa[baysor]' (basically, this installs toml and loompy).

Inferred config

If the config argument is not provided, the configuration is inferred. If sopa.make_transcript_patches was run with a prior_shapes_key, the configuration is inferred based on the prior segmentation. Otherwise, the configuration is inferred based on the scale parameter (you'll need to provide it).

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object.	required
`config`	`dict \| str \| None`	Optional configuration dictionary or path to a TOML file containing a valid Baysor config. By default, a configuration is inferred based on the cell area of the prior segmentation, or based on the `scale` parameter.	`None`
`min_area`	`int`	Minimal area (in microns^2) of a cell to be considered.	`0`
`delete_cache`	`bool`	Whether to delete the cache after segmentation.	`True`
`recover`	`bool`	If `True`, recover the cache from a failed segmentation, and continue.	`False`
`force`	`bool`	If `True`, ignore failed patches and continue with the successful ones.	`False`
`scale`	`float \| None`	The typical cell radius in microns. If `config` is not provided, the configuration is inferred based on this parameter.	`None`
`key_added`	`str`	Name of the shapes element to be added to `sdata.shapes`.	`BAYSOR_BOUNDARIES`
`patch_index`	`int \| None`	Index of the patch to segment (we do not recommend to set this argument). By default, segment all patches.	`None`

Source code in sopa/segmentation/methods/_baysor.py

def baysor(
    sdata: SpatialData,
    config: dict | str | None = None,
    min_area: int = 0,
    delete_cache: bool = True,
    recover: bool = False,
    force: bool = False,
    scale: float | None = None,
    key_added: str = SopaKeys.BAYSOR_BOUNDARIES,
    patch_index: int | None = None,
):
    """Run [Baysor](https://kharchenkolab.github.io/Baysor/dev/) segmentation on a SpatialData object, and add a GeoDataFrame containing the cell boundaries.

    !!! warning "Baysor installation"
        Make sure to install [Baysor](https://kharchenkolab.github.io/Baysor/dev/installation/), and either have the executable at `~/.julia/bin/baysor`, or create an alias called
        `baysor` that points to the binary executable. Also, you'll need to install
        sopa with the baysor extra: `pip install 'sopa[baysor]'` (basically, this installs `toml` and `loompy`).

    !!! info "Inferred config"
        If the `config` argument is not provided, the configuration is inferred.
        If [sopa.make_transcript_patches][] was run with a `prior_shapes_key`, the configuration is inferred based on the prior segmentation.
        Otherwise, the configuration is inferred based on the `scale` parameter (you'll need to provide it).

    Args:
        sdata: A `SpatialData` object.
        config: Optional configuration dictionary or path to a TOML file containing a valid Baysor config. By default, a configuration is inferred based on the cell area of the prior segmentation, or based on the `scale` parameter.
        min_area: Minimal area (in microns^2) of a cell to be considered.
        delete_cache: Whether to delete the cache after segmentation.
        recover: If `True`, recover the cache from a failed segmentation, and continue.
        force: If `True`, ignore failed patches and continue with the successful ones.
        scale: The typical cell radius in microns. If `config` is not provided, the configuration is inferred based on this parameter.
        key_added: Name of the shapes element to be added to `sdata.shapes`.
        patch_index: Index of the patch to segment (we do not recommend to set this argument). By default, segment all patches.
    """
    _check_transcript_patches(sdata)

    prior_shapes_key = None
    if SopaKeys.PRIOR_SHAPES_KEY in sdata.shapes[SopaKeys.TRANSCRIPTS_PATCHES]:
        prior_shapes_key = sdata.shapes[SopaKeys.TRANSCRIPTS_PATCHES][SopaKeys.PRIOR_SHAPES_KEY].iloc[0]

    if config is None or not len(config):
        config = _get_default_config(sdata, prior_shapes_key, scale)

    baysor_command = _get_baysor_command(prior_shapes_key)

    baysor_patch = BaysorPatch(baysor_command, config, force=force, capture_output=patch_index is None)

    if patch_index is not None:
        patch_dir = Path(sdata.shapes[SopaKeys.TRANSCRIPTS_PATCHES].loc[patch_index, SopaKeys.CACHE_PATH_KEY])
        baysor_patch(patch_dir)
        return

    patches_dirs = get_transcripts_patches_dirs(sdata)

    remaining_patches_dirs = (
        [patch_dir for patch_dir in patches_dirs if not (patch_dir / "segmentation_counts.loom").exists()]
        if recover
        else patches_dirs
    )

    settings._run_with_backend([partial(baysor_patch, patch_dir) for patch_dir in remaining_patches_dirs])

    if force:
        patches_dirs = [patch_dir for patch_dir in patches_dirs if (patch_dir / "segmentation_counts.loom").exists()]
        assert patches_dirs, "Baysor failed on all patches"

    gene_column = _get_gene_column_argument(config)
    resolve(sdata, patches_dirs, gene_column, min_area=min_area, key_added=key_added)

    sdata.attrs[SopaAttrs.BOUNDARIES] = key_added

    if delete_cache:
        delete_transcripts_patches_dirs(sdata)

`sopa.segmentation.comseg(sdata, config=None, min_area=0, delete_cache=True, recover=False, key_added=SopaKeys.COMSEG_BOUNDARIES, patch_index=None)`

Run ComSeg segmentation on a SpatialData object, and add a GeoDataFrame containing the cell boundaries.

ComSeg installation

Make sure to install ComSeg (pip install comseg) for this method to work.

Transcript patches

To use ComSeg, make sure to run sopa.make_transcript_patches with a prior_shapes_key and write_cells_centroids=True.

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object.	required
`config`	`dict \| str \| None`	Optional configuration dictionary or path to a JSON file containing a valid ComSeg config. By default, a configuration is inferred based on the cell area of the prior segmentation.	`None`
`min_area`	`float`	Minimal area (in microns^2) of a cell to be considered.	`0`
`delete_cache`	`bool`	Whether to delete the cache after segmentation.	`True`
`recover`	`bool`	If `True`, recover the cache from a failed segmentation, and continue.	`False`
`key_added`	`str`	Name of the shapes element to be added to `sdata`.	`COMSEG_BOUNDARIES`
`patch_index`	`int \| None`	Index of the patch to segment (we do not recommend to set this argument). By default, segment all patches.	`None`

Source code in sopa/segmentation/methods/_comseg.py

def comseg(
    sdata: SpatialData,
    config: dict | str | None = None,
    min_area: float = 0,
    delete_cache: bool = True,
    recover: bool = False,
    key_added: str = SopaKeys.COMSEG_BOUNDARIES,
    patch_index: int | None = None,
):
    """Run [ComSeg](https://comseg.readthedocs.io/en/latest/) segmentation on a SpatialData object, and add a GeoDataFrame containing the cell boundaries.

    !!! warning "ComSeg installation"
        Make sure to install ComSeg (`pip install comseg`) for this method to work.

    !!! info "Transcript patches"
        To use ComSeg, make sure to run [sopa.make_transcript_patches][] with a `prior_shapes_key` and `write_cells_centroids=True`.

    Args:
        sdata: A `SpatialData` object.
        config: Optional configuration dictionary or path to a JSON file containing a valid ComSeg config. By default, a configuration is inferred based on the cell area of the prior segmentation.
        min_area: Minimal area (in microns^2) of a cell to be considered.
        delete_cache: Whether to delete the cache after segmentation.
        recover: If `True`, recover the cache from a failed segmentation, and continue.
        key_added: Name of the shapes element to be added to `sdata`.
        patch_index: Index of the patch to segment (we do not recommend to set this argument). By default, segment all patches.
    """
    _check_transcript_patches(sdata, with_prior=True)

    if config is None or not len(config):
        config = _get_default_config(sdata, sdata.shapes[SopaKeys.TRANSCRIPTS_PATCHES])
    elif isinstance(config, str):
        with open(config) as f:
            config = json.load(f)

    assert "gene_column" in config, "'gene_column' not found in config"

    config["prior_name"] = sdata[SopaKeys.TRANSCRIPTS_PATCHES][SopaKeys.PRIOR_SHAPES_KEY].iloc[0]

    if patch_index is not None:
        patch_dir = Path(sdata.shapes[SopaKeys.TRANSCRIPTS_PATCHES].loc[patch_index, SopaKeys.CACHE_PATH_KEY])
        comseg_patch(patch_dir, config, recover)
        return

    patches_dirs = get_transcripts_patches_dirs(sdata)

    _functions = [partial(comseg_patch, patch_dir, config, recover) for patch_dir in patches_dirs]
    settings._run_with_backend(_functions)

    resolve(sdata, patches_dirs, config["gene_column"], min_area=min_area, key_added=key_added)

    sdata.attrs[SopaAttrs.BOUNDARIES] = key_added

    if delete_cache:
        delete_transcripts_patches_dirs(sdata)

`sopa.segmentation.proseg(sdata, delete_cache=True, command_line_suffix='', key_added=SopaKeys.PROSEG_BOUNDARIES)`

Run proseg segmentation on a SpatialData object, and add the corresponding cell boundaries and AnnData table with counts.

Proseg installation

Make sure to install proseg separately before running this function.

Proseg usage specificities

Contrary to most other segmentation tools, proseg will only run on one patch. I.e., you need to run sopa.make_transcript_patches with patch_width=None and a prior_shapes_key before running proseg.

Also, note that aggregation is not necessary after running proseg.

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object.	required
`delete_cache`	`bool`	Whether to delete the cache after segmentation.	`True`
`command_line_suffix`	`str`	Optional suffix to add to the proseg command line.	`''`
`key_added`	`str`	Name of the shapes element to be added to `sdata.shapes`.	`PROSEG_BOUNDARIES`

Source code in sopa/segmentation/methods/_proseg.py

def proseg(
    sdata: SpatialData,
    delete_cache: bool = True,
    command_line_suffix: str = "",
    key_added: str = SopaKeys.PROSEG_BOUNDARIES,
):
    """Run [`proseg`](https://github.com/dcjones/proseg) segmentation on a SpatialData object, and add the corresponding cell boundaries and `AnnData` table with counts.

    !!! warning "Proseg installation"
        Make sure to install [`proseg`](https://github.com/dcjones/proseg) separately before running this function.

    !!! info "Proseg usage specificities"
        Contrary to most other segmentation tools, `proseg` will only run on one patch. I.e., you need
        to run [`sopa.make_transcript_patches`](../patches/#sopa.make_transcript_patches) with `patch_width=None` and a `prior_shapes_key` before running `proseg`.

        Also, note that aggregation is not necessary after running `proseg`.

    Args:
        sdata: A `SpatialData` object.
        delete_cache: Whether to delete the cache after segmentation.
        command_line_suffix: Optional suffix to add to the proseg command line.
        key_added: Name of the shapes element to be added to `sdata.shapes`.
    """
    _check_transcript_patches(sdata)

    points_key = sdata[SopaKeys.TRANSCRIPTS_PATCHES][SopaKeys.POINTS_KEY].iloc[0]

    patches_dirs = get_transcripts_patches_dirs(sdata)
    assert len(patches_dirs) == 1, (
        "Proseg is fast enough to work on a single patch. Re-run `sopa.make_transcript_patches` with `patch_width=None` and a `prior_shapes_key`."
    )
    patch_dir = Path(patches_dirs[0])

    proseg_command = _get_proseg_command(sdata, points_key, command_line_suffix)

    _run_proseg(proseg_command, patch_dir)
    adata, geo_df = _read_proseg(sdata, patch_dir, points_key)

    add_standardized_table(sdata, adata, geo_df, key_added, SopaKeys.TABLE)

    sdata.attrs[SopaAttrs.BOUNDARIES] = key_added

    if delete_cache:
        delete_transcripts_patches_dirs(sdata)

    log.info("Proseg table and boundaries added (running `sopa.aggregate` is not mandatory).")

`sopa.segmentation.custom_staining_based(sdata, method, channels, image_key=None, min_area=0, delete_cache=True, recover=False, clip_limit=0.2, clahe_kernel_size=None, gaussian_sigma=1, cache_dir_name=SopaKeys.CUSTOM_BOUNDARIES, key_added=SopaKeys.CUSTOM_BOUNDARIES)`

Run a generic staining-based segmentation model, and add a GeoDataFrame containing the cell boundaries.

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object.	required
`method`	`Callable`	A segmentation `callable` whose input is an image of shape `(C, Y, X)` and output is a cell mask of shape `(Y, X)`. Each mask value `>0` represent a unique cell ID. The `C` channels is determined by the `channels` argument.	required
`channels`	`list[str] \| str \| None`	Name of the channels to be used for segmentation (or list of channel names).	required
`image_key`	`str \| None`	Name of the image in `sdata` to be used for segmentation.	`None`
`min_area`	`float`	Minimum area of a cell to be considered.	`0`
`delete_cache`	`bool`	Whether to delete the cache after segmentation.	`True`
`recover`	`bool`	If `True`, recover the cache from a failed segmentation, and continue.	`False`
`clip_limit`	`float`	Parameter for skimage.exposure.equalize_adapthist (applied before running segmentation)	`0.2`
`clahe_kernel_size`	`int \| list[int] \| None`	Parameter for skimage.exposure.equalize_adapthist (applied before running segmentation)	`None`
`gaussian_sigma`	`float`	Parameter for scipy gaussian_filter (applied before running segmentation)	`1`
`cache_dir_name`	`str`	Name of the cache directory.	`CUSTOM_BOUNDARIES`
`key_added`	`str`	Name of the key to be added to `sdata.shapes`.	`CUSTOM_BOUNDARIES`

Source code in sopa/segmentation/methods/_custom.py

def custom_staining_based(
    sdata: SpatialData,
    method: Callable,
    channels: list[str] | str | None,
    image_key: str | None = None,
    min_area: float = 0,
    delete_cache: bool = True,
    recover: bool = False,
    clip_limit: float = 0.2,
    clahe_kernel_size: int | list[int] | None = None,
    gaussian_sigma: float = 1,
    cache_dir_name: str = SopaKeys.CUSTOM_BOUNDARIES,
    key_added: str = SopaKeys.CUSTOM_BOUNDARIES,
):
    """Run a generic staining-based segmentation model, and add a GeoDataFrame containing the cell boundaries.

    Args:
        sdata: A `SpatialData` object.
        method: A segmentation `callable` whose input is an image of shape `(C, Y, X)` and output is a cell mask of shape `(Y, X)`. Each mask value `>0` represent a unique cell ID. The `C` channels is determined by the `channels` argument.
        channels: Name of the channels to be used for segmentation (or list of channel names).
        image_key: Name of the image in `sdata` to be used for segmentation.
        min_area: Minimum area of a cell to be considered.
        delete_cache: Whether to delete the cache after segmentation.
        recover: If `True`, recover the cache from a failed segmentation, and continue.
        clip_limit: Parameter for skimage.exposure.equalize_adapthist (applied before running segmentation)
        clahe_kernel_size: Parameter for skimage.exposure.equalize_adapthist (applied before running segmentation)
        gaussian_sigma: Parameter for scipy gaussian_filter (applied before running segmentation)
        cache_dir_name: Name of the cache directory.
        key_added: Name of the key to be added to `sdata.shapes`.
    """
    temp_dir = get_cache_dir(sdata) / cache_dir_name

    segmentation = StainingSegmentation(
        sdata,
        method,
        channels,
        min_area=min_area,
        image_key=image_key,
        clip_limit=clip_limit,
        clahe_kernel_size=clahe_kernel_size,
        gaussian_sigma=gaussian_sigma,
    )
    segmentation.write_patches_cells(temp_dir, recover=recover)

    cells = StainingSegmentation.read_patches_cells(temp_dir)
    cells = solve_conflicts(cells)

    StainingSegmentation.add_shapes(sdata, cells, image_key=segmentation.image_key, key_added=key_added)

    sdata.attrs[SopaAttrs.BOUNDARIES] = key_added

    if delete_cache:
        shutil.rmtree(temp_dir)

Tissue segmentation

`sopa.segmentation.tissue(sdata, image_key=None, level=-1, mode=None, expand_radius_ratio=0.05, channel=None, clip_parameters=(0.9, 5), blur_kernel_size=5, open_kernel_size=5, close_kernel_size=5, drop_threshold=0.01, allow_holes=True, key_added=SopaKeys.ROI)`

Perform a contouring of the tissue (i.e., "tissue-segmentation"). The resulting regions-of-interest(s) are saved as shapes in the SpatialData object. There are two modes available: saturation and staining. The saturation mode is used for H&E data, while the staining mode is used on staining images (more details below).

Saturation mode

This segmentation method first transforms the image from RBG color space to HSV. Then, on the basis of the saturation channel, a median blurring is applied with an element of size blur_kernel_size before running the Otsu method. Then a morphological opening and closing are applied as a prostprocessing step with square elements of size open_kernel_size and close_kernel_size. Lastly, the connected components with size less than drop_threshold * number_of_pixel_of_the_image are removed, and the rest are converted into polygons.

Staining mode

Instead of extracting the saturation channel, the image is converted to a grayscale image by taking the maximum value of all channels (or the specified channel, if "channel" is given). The rest of the steps are the same as in the saturation mode.

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object representing an H&E image	required
`image_key`	`str \| None`	Optional key of the H&E image	`None`
`level`	`int`	Level of the multiscale image on which the segmentation will be performed (if the image is a `DataTree`)	`-1`
`mode`	`str \| None`	Two modes are available: `saturation` (for H&E data) and `staining`. By default, `saturation` is used only if there are exactly 3 channels.	`None`
`expand_radius_ratio`	`float`	The ratio of the radius of the polygons that will be expanded.	`0.05`
`channel`	`str \| None`	The channel to use for the `staining` mode. If `None`, the maximum value of all channels is used.	`None`
`clip_parameters`	`tuple[float, float]`	Parameters used to get the threshold used to clip the image before converting it to an 8-bit image (only used in "staining" mode). The first parameter is the quantile, and the second is the divisor. By default, the threshold is the 90th quantile divided by 5.	`(0.9, 5)`
`blur_kernel_size`	`int`	The kernel size of the median bluring operation	`5`
`open_kernel_size`	`int`	The kernel size of the morphological openning operation	`5`
`close_kernel_size`	`int`	The kernel size of the morphological closing operation	`5`
`drop_threshold`	`float`	Segments that cover less area than a ratio of `drop_threshold` of the number of pixels of the image will be removed	`0.01`
`allow_holes`	`bool`	If `True`, the holes in the polygons will be kept. If `False`, the holes will be removed.	`True`
`key_added`	`str`	Name of the spatial element that will be added, containing the segmented tissue polygons.	`ROI`

Source code in sopa/segmentation/_tissue.py

def tissue(
    sdata: SpatialData,
    image_key: str | None = None,
    level: int = -1,
    mode: str | None = None,
    expand_radius_ratio: float = 0.05,
    channel: str | None = None,
    clip_parameters: tuple[float, float] = (0.9, 5),
    blur_kernel_size: int = 5,
    open_kernel_size: int = 5,
    close_kernel_size: int = 5,
    drop_threshold: float = 0.01,
    allow_holes: bool = True,
    key_added: str = SopaKeys.ROI,
):
    """Perform a contouring of the tissue (i.e., "tissue-segmentation"). The resulting regions-of-interest(s) are saved as shapes in the `SpatialData` object. There are two
    modes available: `saturation` and `staining`. The `saturation` mode is used for H&E data, while the `staining` mode
    is used on staining images (more details below).

    !!! info "Saturation mode"
        This segmentation method first transforms the image from RBG color space to HSV. Then,
        on the basis of the saturation channel, a median blurring is applied with an element of size `blur_kernel_size`
        before running the Otsu method. Then a morphological opening and closing are applied as a prostprocessing
        step with square elements of size `open_kernel_size` and `close_kernel_size`. Lastly, the connected components
        with size less than `drop_threshold * number_of_pixel_of_the_image` are removed, and the
        rest are converted into polygons.

    !!! info "Staining mode"
        Instead of extracting the saturation channel, the image is converted to a grayscale image by taking the maximum
        value of all channels (or the specified channel, if `"channel"` is given). The rest of the steps are the same as in the saturation mode.

    Args:
        sdata: A `SpatialData` object representing an H&E image
        image_key: Optional key of the H&E image
        level: Level of the multiscale image on which the segmentation will be performed (if the image is a `DataTree`)
        mode: Two modes are available: `saturation` (for H&E data) and `staining`. By default, `saturation` is used only if there are exactly 3 channels.
        expand_radius_ratio: The ratio of the radius of the polygons that will be expanded.
        channel: The channel to use for the `staining` mode. If `None`, the maximum value of all channels is used.
        clip_parameters: Parameters used to get the threshold used to clip the image before converting it to an 8-bit image (only used in "staining" mode). The first parameter is the quantile, and the second is the divisor. By default, the threshold is the 90th quantile divided by 5.
        blur_kernel_size: The kernel size of the median bluring operation
        open_kernel_size: The kernel size of the morphological openning operation
        close_kernel_size: The kernel size of the morphological closing operation
        drop_threshold: Segments that cover less area than a ratio of `drop_threshold` of the number of pixels of the image will be removed
        allow_holes: If `True`, the holes in the polygons will be kept. If `False`, the holes will be removed.
        key_added: Name of the spatial element that will be added, containing the segmented tissue polygons.
    """
    image, mode = _get_image_and_mode(sdata, image_key, mode, channel)

    if key_added in sdata.shapes:
        log.warning(f"sdata['{key_added}'] was already existing, but tissue segmentation is run on top")

    if isinstance(image, DataTree):
        level_keys = list(image.keys())
        image: DataArray = next(iter(image[level_keys[level]].values()))

    geo_df = TissueSegmentation(
        image=image,
        blur_kernel_size=blur_kernel_size,
        open_kernel_size=open_kernel_size,
        close_kernel_size=close_kernel_size,
        drop_threshold=drop_threshold,
        channel=channel,
        clip_parameters=clip_parameters,
        allow_holes=allow_holes,
    ).get_polygons(mode)

    if not len(geo_df):
        log.warning(
            "No polygon has been found after tissue segmentation. "
            "Check that there is some tissue in the image, or consider updating the function parameters."
        )
        return

    geo_df = expand_radius(geo_df, expand_radius_ratio)
    geo_df = geo_df.explode(index_parts=False, ignore_index=True)
    geo_df = to_valid_polygons(geo_df, simple_polygon=not allow_holes)

    geo_df = ShapesModel.parse(geo_df, transformations=get_transformation(image, get_all=True).copy())

    add_spatial_element(sdata, key_added, geo_df)

Segmentation utils

`sopa.segmentation.combine(sdata, elements, key_added, threshold=0.5)`

Combine multiple segmentation boundaries into a single one.

Example

On the example below, we run Cellpose twice, once for nuclei and once for tumor cells. We then combine the two segmentations into a single one.

import sopa

sdata = sopa.io.toy_dataset(length=1000)
sopa.make_image_patches(sdata)

sopa.segmentation.cellpose(sdata, "DAPI", diameter=35, key_added="nuclei")
sopa.segmentation.cellpose(sdata, ["DAPI", "CK"], diameter=35, key_added="tumor_cells")

sopa.segmentation.combine(sdata, ["nuclei", "tumor_cells"], key_added="combined_cells")

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object.	required
`elements`	`list[str \| GeoDataFrame]`	List of name of the keys in `sdata.shapes` to be combined (or directly a list of `GeoDataFrame`).	required
`key_added`	`str`	The name of the new key to be added to `sdata.shapes`.	required
`threshold`	`float`	When two cells are overlapping, we look at the area of intersection over the area of the smallest cell. If this value is higher than the `threshold`, the cells are merged	`0.5`

Source code in sopa/segmentation/resolve.py

def combine(
    sdata: SpatialData,
    elements: list[str | gpd.GeoDataFrame],
    key_added: str,
    threshold: float = 0.5,
):
    """Combine multiple segmentation boundaries into a single one.

    Example:
        On the example below, we run Cellpose twice, once for nuclei and once for tumor cells. We then combine the two segmentations into a single one.
        ```python
        import sopa

        sdata = sopa.io.toy_dataset(length=1000)
        sopa.make_image_patches(sdata)

        sopa.segmentation.cellpose(sdata, "DAPI", diameter=35, key_added="nuclei")
        sopa.segmentation.cellpose(sdata, ["DAPI", "CK"], diameter=35, key_added="tumor_cells")

        sopa.segmentation.combine(sdata, ["nuclei", "tumor_cells"], key_added="combined_cells")
        ```

    Args:
        sdata: A `SpatialData` object.
        elements: List of name of the keys in `sdata.shapes` to be combined (or directly a list of `GeoDataFrame`).
        key_added: The name of the new key to be added to `sdata.shapes`.
        threshold: When two cells are overlapping, we look at the area of intersection over the area of the smallest cell. If this value is higher than the `threshold`, the cells are merged
    """
    assert len(elements) > 1, "At least two elements must be provided to combine"

    elements: list[gpd.GeoDataFrame] = [
        element if isinstance(element, gpd.GeoDataFrame) else sdata.shapes[element] for element in elements
    ]

    reference = elements[0]
    intrinsic_elements = [reference] + [to_intrinsic(sdata, element, reference) for element in elements[1:]]

    combined_cells = list(pd.concat([element.geometry for element in intrinsic_elements], axis=0))
    combined_cells = solve_conflicts(combined_cells, threshold=threshold)

    combined_geo_df = ShapesModel.parse(combined_cells, transformations=get_transformation(reference, get_all=True))

    sdata.shapes[key_added] = combined_geo_df

`sopa.overlay_segmentation(sdata, shapes_key, gene_column=None, area_ratio_threshold=0.25, image_key=None, table_key=SopaKeys.TABLE)`

Overlay a segmentation on top of an existing segmentation

Parameters:

Name	Type	Description	Default
`sdata`	`SpatialData`	A `SpatialData` object	required
`shapes_key`	`str`	The key of the new shapes to be added	required
`gene_column`	`str \| None`	Key of the points dataframe containing the genes names	`None`
`area_ratio_threshold`	`float`	Threshold between 0 and 1. For each original cell overlapping with a new cell, we compute the overlap-area/cell-area, if above the threshold the cell is removed.	`0.25`
`image_key`	`str \| None`	Optional key of the original image	`None`
`table_key`	`str`	Key of the table to be overlayed	`TABLE`

Source code in sopa/aggregation/overlay.py

def overlay_segmentation(
    sdata: SpatialData,
    shapes_key: str,
    gene_column: str | None = None,
    area_ratio_threshold: float = 0.25,
    image_key: str | None = None,
    table_key: str = SopaKeys.TABLE,
):
    """Overlay a segmentation on top of an existing segmentation

    Args:
        sdata: A `SpatialData` object
        shapes_key: The key of the new shapes to be added
        gene_column: Key of the points dataframe containing the genes names
        area_ratio_threshold: Threshold between 0 and 1. For each original cell overlapping with a new cell, we compute the overlap-area/cell-area, if above the threshold the cell is removed.
        image_key: Optional key of the original image
        table_key: Key of the table to be overlayed
    """
    aggregate_genes, aggregate_channels = False, False

    assert table_key in sdata.tables, f"No table with name '{table_key}' found in the SpatialData object"

    old_table: AnnData = sdata.tables[table_key]

    assert SopaKeys.UNS_KEY in old_table.uns, "It seems the table was not aggregated using `sopa.aggregate`"

    sopa_attrs = old_table.uns[SopaKeys.UNS_KEY]

    aggregate_genes = sopa_attrs[SopaKeys.UNS_HAS_TRANSCRIPTS]
    aggregate_channels = sopa_attrs[SopaKeys.UNS_HAS_INTENSITIES]

    if aggregate_genes and gene_column is None:
        points = get_spatial_element(sdata.points, key=sdata.attrs.get(SopaAttrs.TRANSCRIPTS))
        gene_column = get_feature_key(points, raise_error=True)

    aggregator = Aggregator(sdata, image_key=image_key, shapes_key=shapes_key)
    aggregator.sdata.tables[f"{SopaKeys.OLD_TABLE_PREFFIX}{table_key}"] = old_table
    del aggregator.sdata.tables[table_key]

    old_shapes_key = old_table.uns["spatialdata_attrs"]["region"]
    instance_key = old_table.uns["spatialdata_attrs"]["instance_key"]

    if isinstance(old_shapes_key, list):
        assert len(old_shapes_key) == 1, "Can't overlap segmentation on multi-region SpatialData object"
        old_shapes_key = old_shapes_key[0]

    old_geo_df = aggregator.sdata[old_shapes_key]
    geo_df = to_intrinsic(aggregator.sdata, aggregator.geo_df, old_geo_df)

    geo_df.index.name = None
    gdf_join = gpd.sjoin(old_geo_df, geo_df)
    gdf_join["geometry_right"] = gdf_join["index_right"].map(lambda i: geo_df.geometry.iloc[i])
    gdf_join["overlap_ratio"] = gdf_join.apply(_overlap_area_ratio, axis=1)
    gdf_join: gpd.GeoDataFrame = gdf_join[gdf_join.overlap_ratio >= area_ratio_threshold]

    table_crop = old_table[~np.isin(old_table.obs[instance_key], gdf_join.index)].copy()
    table_crop.obs[SopaKeys.CELL_OVERLAY_KEY] = False

    aggregator.compute_table(
        aggregate_channels=aggregate_channels,
        aggregate_genes=aggregate_genes,
        gene_column=gene_column,
        key_added=table_key,
    )
    aggregator.table.obs[SopaKeys.CELL_OVERLAY_KEY] = True

    aggregator.table = anndata.concat(
        [table_crop, aggregator.table],
        uns_merge="first",
        join="outer",
    )

    aggregator.shapes_key = f"{old_shapes_key}_overlay_{aggregator.shapes_key}"

    geo_df_cropped = old_geo_df.loc[~old_geo_df.index.isin(gdf_join.index)]
    aggregator.geo_df = pd.concat([geo_df_cropped, geo_df], join="outer", axis=0)
    aggregator.geo_df.attrs = old_geo_df.attrs

    aggregator.add_standardized_table(table_key)