Skip to content

Utils

Cache operations

sopa.utils.get_cache_dir(sdata)

Get the cache directory for a SpatialData object.

Parameters:

Name Type Description Default
sdata SpatialData

A SpatialData object.

required

Returns:

Type Description
Path

A Path to the cache directory.

Source code in sopa/utils/utils.py
def get_cache_dir(sdata: SpatialData) -> Path:
    """Get the cache directory for a SpatialData object.

    Args:
        sdata: A `SpatialData` object.

    Returns:
        A `Path` to the cache directory.
    """
    if sdata.is_backed():  # inside the zarr directory
        cache_dir = sdata.path / SopaFiles.SOPA_CACHE_DIR
    elif SopaAttrs.UID in sdata.attrs:  # existing cache in the home directory
        cache_dir = HOME_CACHE_DIR / sdata.attrs[SopaAttrs.UID]
    else:  # create a new cache directory in the home directory
        import uuid

        uid = str(uuid.uuid4())
        sdata.attrs[SopaAttrs.UID] = uid
        cache_dir = HOME_CACHE_DIR / str(uid)

    cache_dir.mkdir(exist_ok=True, parents=True)

    return cache_dir

sopa.utils.delete_cache(sdata=None)

Delete the cache directory (the entire cache, or the cache of one specific SpatialData object).

Parameters:

Name Type Description Default
sdata SpatialData | None

The SpatialData object whose cache is to be deleted. If None, the entire cache is deleted.

None
Source code in sopa/utils/utils.py
def delete_cache(sdata: SpatialData | None = None) -> None:
    """Delete the cache directory (the entire cache, or the cache of one specific SpatialData object).

    Args:
        sdata: The SpatialData object whose cache is to be deleted. If None, the entire cache is deleted.
    """
    import shutil

    if sdata is not None:
        cache_dir = get_cache_dir(sdata)
        shutil.rmtree(cache_dir)
        return

    for sub_dir in list(HOME_CACHE_DIR.iterdir()):
        if sub_dir.is_dir():
            shutil.rmtree(sub_dir)

sopa.utils.delete_transcripts_patches_dirs(sdata)

Delete the cache directories containing the transcript patches (for instance, for Baysor or ComSeg)

Parameters:

Name Type Description Default
sdata SpatialData

A SpatialData object.

required
Source code in sopa/utils/utils.py
def delete_transcripts_patches_dirs(sdata: SpatialData):
    """Delete the cache directories containing the transcript patches (for instance, for Baysor or ComSeg)

    Args:
        sdata: A `SpatialData` object.
    """
    import shutil

    for patch_dir in get_transcripts_patches_dirs(sdata):
        shutil.rmtree(patch_dir)

Accessing the elements

sopa.utils.get_spatial_element(element_dict, key=None, return_key=False, as_spatial_image=False)

Gets an element from a SpatialData object.

Parameters:

Name Type Description Default
element_dict dict[str, SpatialElement]

Dictionnary whose values are spatial elements (e.g., sdata.images).

required
key str | None

Optional element key. If None, returns the only element (if only one).

None
return_key bool

Whether to also return the key of the element.

False
as_spatial_image bool

Whether to return the element as a SpatialImage (if it is a DataTree)

False

Returns:

Type Description
SpatialElement | tuple[str, SpatialElement]

If return_key is False, only the element is returned, else a tuple (element_key, element)

Source code in sopa/utils/utils.py
def get_spatial_element(
    element_dict: dict[str, SpatialElement],
    key: str | None = None,
    return_key: bool = False,
    as_spatial_image: bool = False,
) -> SpatialElement | tuple[str, SpatialElement]:
    """Gets an element from a SpatialData object.

    Args:
        element_dict: Dictionnary whose values are spatial elements (e.g., `sdata.images`).
        key: Optional element key. If `None`, returns the only element (if only one).
        return_key: Whether to also return the key of the element.
        as_spatial_image: Whether to return the element as a `SpatialImage` (if it is a `DataTree`)

    Returns:
        If `return_key` is False, only the element is returned, else a tuple `(element_key, element)`
    """
    assert len(element_dict), "No spatial element was found in the dict."

    if key is not None:
        assert key in element_dict, f"Spatial element '{key}' not found."
        return _return_element(element_dict, key, return_key, as_spatial_image)

    assert (
        len(element_dict) > 0
    ), "No spatial element found. Provide an element key to denote which element you want to use."
    assert (
        len(element_dict) == 1
    ), f"Multiple valid elements found: {', '.join(element_dict.keys())}. Provide an element key to denote which element you want to use."

    key = next(iter(element_dict.keys()))

    return _return_element(element_dict, key, return_key, as_spatial_image)

sopa.utils.get_spatial_image(sdata, key=None, return_key=False, valid_attr=SopaAttrs.CELL_SEGMENTATION)

Gets a DataArray from a SpatialData object (if the image has multiple scale, the scale0 is returned)

Parameters:

Name Type Description Default
sdata SpatialData

SpatialData object.

required
key str | None

Optional image key. If None, returns the only image (if only one), or tries to find an image with valid_attr.

None
return_key bool

Whether to also return the key of the image.

False
valid_attr str

Attribute that the image must have to be considered valid.

CELL_SEGMENTATION

Returns:

Type Description
DataArray | tuple[str, DataArray]

If return_key is False, only the image is returned, else a tuple (image_key, image)

Source code in sopa/utils/utils.py
def get_spatial_image(
    sdata: SpatialData,
    key: str | None = None,
    return_key: bool = False,
    valid_attr: str = SopaAttrs.CELL_SEGMENTATION,
) -> DataArray | tuple[str, DataArray]:
    """Gets a DataArray from a SpatialData object (if the image has multiple scale, the `scale0` is returned)

    Args:
        sdata: SpatialData object.
        key: Optional image key. If `None`, returns the only image (if only one), or tries to find an image with `valid_attr`.
        return_key: Whether to also return the key of the image.
        valid_attr: Attribute that the image must have to be considered valid.

    Returns:
        If `return_key` is False, only the image is returned, else a tuple `(image_key, image)`
    """
    return get_spatial_element(
        sdata.images,
        key=key or sdata.attrs.get(valid_attr),
        return_key=return_key,
        as_spatial_image=True,
    )

sopa.utils.get_boundaries(sdata, return_key=False, warn=False, key=None)

Gets cell segmentation boundaries of a SpatialData object after running Sopa.

Parameters:

Name Type Description Default
sdata SpatialData

A SpatialData object

required
return_key bool

Whether to return the key of the shapes or not.

False
warn bool

If True, prints a warning if no boundary is found. Else, raises an error.

False
key str | None

A valid shapes_key or None.

None

Returns:

Type Description
GeoDataFrame | tuple[str, GeoDataFrame] | None

A GeoDataFrame containing the boundaries, or a tuple (shapes_key, geo_df)

Source code in sopa/utils/utils.py
def get_boundaries(
    sdata: SpatialData, return_key: bool = False, warn: bool = False, key: str | None = None
) -> gpd.GeoDataFrame | tuple[str, gpd.GeoDataFrame] | None:
    """Gets cell segmentation boundaries of a SpatialData object after running Sopa.

    Args:
        sdata: A SpatialData object
        return_key: Whether to return the key of the shapes or not.
        warn: If `True`, prints a warning if no boundary is found. Else, raises an error.
        key: A valid `shapes_key` or None.

    Returns:
        A `GeoDataFrame` containing the boundaries, or a tuple `(shapes_key, geo_df)`
    """
    key = key or sdata.attrs.get(SopaAttrs.BOUNDARIES)

    if key is not None:
        return get_spatial_element(sdata.shapes, key=key, return_key=return_key)

    VALID_BOUNDARIES = [
        SopaKeys.BAYSOR_BOUNDARIES,
        SopaKeys.STARDIST_BOUNDARIES,
        SopaKeys.COMSEG_BOUNDARIES,
        SopaKeys.CELLPOSE_BOUNDARIES,
    ]
    for key in VALID_BOUNDARIES:
        res = _try_get_boundaries(sdata, key, return_key)
        if res is not None:
            return res

    error_message = "sdata object has no valid segmentation boundary. Consider running Sopa segmentation first."

    if not warn:
        raise ValueError(error_message)

    log.warning(error_message)
    return (None, None) if return_key else None

sopa.utils.get_intensities(sdata)

Gets the intensity dataframe of shape n_obs x n_channels

Source code in sopa/utils/utils.py
def get_intensities(sdata: SpatialData) -> pd.DataFrame | None:
    """Gets the intensity dataframe of shape `n_obs x n_channels`"""
    assert SopaKeys.TABLE in sdata.tables, f"No '{SopaKeys.TABLE}' found in sdata.tables"

    adata = sdata.tables[SopaKeys.TABLE]

    if not adata.uns[SopaKeys.UNS_KEY][SopaKeys.UNS_HAS_INTENSITIES]:
        return None

    if adata.uns[SopaKeys.UNS_KEY][SopaKeys.UNS_HAS_TRANSCRIPTS]:
        return adata.obsm[SopaKeys.INTENSITIES_OBSM]

    return adata.to_df()

sopa.utils.get_channel_names(image, image_key=None)

Get the channel names of an image or a SpatialData object.

Parameters:

Name Type Description Default
image DataArray | DataTree | SpatialData

Either a DataArray, a DataTree, or a SpatialData object. If a SpatialData object, the image_key argument can be used.

required
image_key str | None

If image is a SpatialData object, the key of the image to get the channel names from. If None, tries to get it automatically.

None

Returns:

Type Description
ndarray

An array of channel names.

Source code in sopa/utils/image.py
def get_channel_names(image: DataArray | DataTree | SpatialData, image_key: str | None = None) -> np.ndarray:
    """Get the channel names of an image or a SpatialData object.

    Args:
        image: Either a `DataArray`, a `DataTree`, or a `SpatialData` object. If a `SpatialData` object, the `image_key` argument can be used.
        image_key: If `image` is a SpatialData object, the key of the image to get the channel names from. If `None`, tries to get it automatically.

    Returns:
        An array of channel names.
    """
    if isinstance(image, SpatialData):
        image = get_spatial_image(image, key=image_key)

    if isinstance(image, DataArray):
        return image.coords["c"].values
    if isinstance(image, DataTree):
        return image["scale0"].coords["c"].values
    raise ValueError(f"Image must be a DataTree or a DataArray. Found: {type(image)}")

sopa.utils.set_sopa_attrs(sdata, cell_segmentation_key=None, tissue_segmentation_key=None, transcripts_key=None, boundaries_key=None, bins_table_key=None)

Stores in the SpatialData object the keys of the main elements used in Sopa. This allows Sopa to retreive with elements should be used for each operation.

Info

The attrs are already stored in sdata.attrs when reading data with sopa.io. Use this function only if you already stored on disk a SpatialData object without the attrs (with sopa<2.0.0).

Parameters:

Name Type Description Default
sdata SpatialData

A SpatialData object.

required
cell_segmentation_key str | None

Name of the image to be used for cell segmentation (highest resolution image).

None
tissue_segmentation_key str | None

Name of the image to be used for tissue segmentation (medium/low resolution image).

None
transcripts_key str | None

Name of the points containing the transcripts.

None
boundaries_key str | None

Name of the shapes containing the cell boundaries.

None
bins_table_key str | None

Name of the table containing the bins (e.g., for Visium HD data).

None
Source code in sopa/utils/utils.py
def set_sopa_attrs(
    sdata: SpatialData,
    cell_segmentation_key: str | None = None,
    tissue_segmentation_key: str | None = None,
    transcripts_key: str | None = None,
    boundaries_key: str | None = None,
    bins_table_key: str | None = None,
):
    """Stores in the `SpatialData` object the keys of the main elements used in Sopa.
    This allows Sopa to retreive with elements should be used for each operation.

    !!! info
        The attrs are already stored in `sdata.attrs` when reading data with `sopa.io`.
        Use this function only if you already stored on disk a SpatialData object without the attrs (with `sopa<2.0.0`).

    Args:
        sdata: A `SpatialData` object.
        cell_segmentation_key: Name of the image to be used for cell segmentation (highest resolution image).
        tissue_segmentation_key: Name of the image to be used for tissue segmentation (medium/low resolution image).
        transcripts_key: Name of the points containing the transcripts.
        boundaries_key: Name of the shapes containing the cell boundaries.
        bins_table_key: Name of the table containing the bins (e.g., for Visium HD data).
    """
    if cell_segmentation_key is not None:
        assert cell_segmentation_key in sdata.images
        sdata.attrs[SopaAttrs.CELL_SEGMENTATION] = cell_segmentation_key

    if tissue_segmentation_key is not None:
        assert tissue_segmentation_key in sdata.images
        sdata.attrs[SopaAttrs.TISSUE_SEGMENTATION] = tissue_segmentation_key

    if transcripts_key is not None:
        assert transcripts_key in sdata.points
        sdata.attrs[SopaAttrs.TRANSCRIPTS] = transcripts_key

    if boundaries_key is not None:
        assert boundaries_key in sdata.shapes
        sdata.attrs[SopaAttrs.BOUNDARIES] = boundaries_key

    if bins_table_key is not None:
        assert bins_table_key in sdata.tables
        sdata.attrs[SopaAttrs.BINS_TABLE] = bins_table_key

Transformations and scaling

sopa.utils.to_intrinsic(sdata, element, target_element)

Transforms a SpatialElement into the intrinsic coordinate system of another SpatialElement

Parameters:

Name Type Description Default
sdata SpatialData

A SpatialData object

required
element SpatialElement | str

SpatialElement to transform, or its key. We recommend it to choose a vector element (for instance, points or shapes).

required
target_element SpatialElement | str

SpatialElement of the target coordinate system, or its key.

required

Returns:

Type Description
SpatialElement

The element with coordinates transformed to the intrinsic coordinate system of target_element.

Source code in sopa/utils/utils.py
def to_intrinsic(
    sdata: SpatialData, element: SpatialElement | str, target_element: SpatialElement | str
) -> SpatialElement:
    """Transforms a `SpatialElement` into the intrinsic coordinate system of another `SpatialElement`

    Args:
        sdata: A SpatialData object
        element: `SpatialElement` to transform, or its key. We recommend it to choose a vector element (for instance, points or shapes).
        target_element: `SpatialElement` of the target coordinate system, or its key.

    Returns:
        The `element` with coordinates transformed to the intrinsic coordinate system of `target_element`.
    """
    element = sdata[element] if isinstance(element, str) else element
    target_element = sdata[target_element] if isinstance(target_element, str) else target_element

    for cs, transformation in get_transformation(element, get_all=True).items():
        if isinstance(transformation, Identity):
            target_transformations = get_transformation(target_element, get_all=True)
            if isinstance(target_transformations.get(cs), Identity):
                return element  # no transformation needed
            break

    try:
        transformation = get_transformation_between_coordinate_systems(sdata, element, target_element)
    except:
        transformations1 = get_transformation(element, get_all=True)
        transformations2 = get_transformation(target_element, get_all=True)

        common_keys = list(set(transformations1.keys()) & set(transformations2.keys()))

        if not common_keys:
            raise ValueError("No common coordinate system found between the two elements")

        cs = "global" if "global" in common_keys else common_keys.pop()

        transformation = Sequence([transformations1[cs], transformations2[cs].inverse()])

    return spatialdata.transform(element, transformation=transformation, maintain_positioning=True)

sopa.utils.scale_dtype(arr, dtype)

Change the dtype of an array but keep the scale compared to the type maximum value.

Example

For an array of dtype uint8 being transformed to np.uint16, the value 255 will become 65535

Parameters:

Name Type Description Default
arr ndarray

A numpy array

required
dtype dtype

Target numpy data type

required

Returns:

Type Description
ndarray

A scaled numpy array with the dtype provided.

Source code in sopa/utils/image.py
def scale_dtype(arr: np.ndarray, dtype: np.dtype) -> np.ndarray:
    """Change the dtype of an array but keep the scale compared to the type maximum value.

    !!! note "Example"
        For an array of dtype `uint8` being transformed to `np.uint16`, the value `255` will become `65535`

    Args:
        arr: A `numpy` array
        dtype: Target `numpy` data type

    Returns:
        A scaled `numpy` array with the dtype provided.
    """
    assert_is_integer_dtype(arr.dtype)
    assert_is_integer_dtype(dtype)

    if arr.dtype == dtype:
        return arr

    factor = np.iinfo(dtype).max / np.iinfo(arr.dtype).max
    return (arr * factor).astype(dtype)

sopa.utils.resize_numpy(arr, scale_factor, dims, output_shape)

Resize a numpy image

Parameters:

Name Type Description Default
arr ndarray

a numpy array

required
scale_factor float

Scale factor of resizing, e.g. 2 will decrease the width by 2

required
dims list[str]

List of dimension names. Only "x" and "y" are resized.

required
output_shape list[int]

Size of the output array

required

Returns:

Type Description
ndarray

Resized array

Source code in sopa/utils/image.py
def resize_numpy(arr: np.ndarray, scale_factor: float, dims: list[str], output_shape: list[int]) -> np.ndarray:
    """Resize a numpy image

    Args:
        arr: a `numpy` array
        scale_factor: Scale factor of resizing, e.g. `2` will decrease the width by 2
        dims: List of dimension names. Only `"x"` and `"y"` are resized.
        output_shape: Size of the output array

    Returns:
        Resized array
    """
    resize_dims = [dim in ["x", "y"] for dim in dims]
    transform = np.diag([scale_factor if resize_dim else 1 for resize_dim in resize_dims])

    return dask_image.ndinterp.affine_transform(arr, matrix=transform, output_shape=output_shape).compute()

Cell-type annotation

sopa.utils.tangram_annotate(sdata, adata_sc, cell_type_key, reference_preprocessing=None, bag_size=10000, max_obs_reference=10000, **kwargs)

Tangram multi-level annotation. Tangram is run on multiple bags of cells to decrease the RAM usage.

Parameters:

Name Type Description Default
sdata SpatialData

A SpatialData object

required
adata_sc AnnData

A scRNAseq annotated reference

required
cell_type_key str

Key of adata_sc.obs containing the cell types. For multi-level annotation, provide other levels like such: if cell_type_key = "ct", then "ct_level1" and "ct_level2" are the two next levels

required
reference_preprocessing str | None

Preprocessing method used on the reference. Can be "log1p" (normalize_total + log1p) or "normalized" (just normalize_total). By default, consider that no processing was applied (raw counts)

None
bag_size int

Size of each bag on which tangram will be run. Use smaller bags to lower the RAM usage

10000
max_obs_reference int

Maximum number of cells used in adata_sc at each level. Decrease it to lower the RAM usage.

10000
Source code in sopa/utils/annotation.py
def tangram_annotate(
    sdata: SpatialData,
    adata_sc: AnnData,
    cell_type_key: str,
    reference_preprocessing: str | None = None,
    bag_size: int = 10_000,
    max_obs_reference: int = 10_000,
    **kwargs,
):
    """Tangram multi-level annotation. Tangram is run on multiple bags of cells to decrease the RAM usage.

    Args:
        sdata: A `SpatialData` object
        adata_sc: A scRNAseq annotated reference
        cell_type_key: Key of `adata_sc.obs` containing the cell types. For multi-level annotation, provide other levels like such: if `cell_type_key = "ct"`, then `"ct_level1"` and `"ct_level2"` are the two next levels
        reference_preprocessing: Preprocessing method used on the reference. Can be `"log1p"` (normalize_total + log1p) or `"normalized"` (just normalize_total). By default, consider that no processing was applied (raw counts)
        bag_size: Size of each bag on which tangram will be run. Use smaller bags to lower the RAM usage
        max_obs_reference: Maximum number of cells used in `adata_sc` at each level. Decrease it to lower the RAM usage.
    """
    assert SopaKeys.TABLE in sdata.tables, f"No '{SopaKeys.TABLE}' found in sdata.tables"

    ad_sp = sdata.tables[SopaKeys.TABLE]

    MultiLevelAnnotation(
        ad_sp,
        adata_sc,
        cell_type_key,
        reference_preprocessing,
        bag_size,
        max_obs_reference,
        **kwargs,
    ).run()

sopa.utils.higher_z_score(adata, marker_cell_dict, cell_type_key='cell_type')

Simple channel-based segmentation using a marker-to-population dictionary

Parameters:

Name Type Description Default
adata AnnData

An AnnData object

required
marker_cell_dict dict

Dictionary whose keys are channels, and values are the corresponding populations.

required
cell_type_key str

Key of adata.obs where annotations will be stored

'cell_type'
Source code in sopa/utils/annotation.py
def higher_z_score(adata: AnnData, marker_cell_dict: dict, cell_type_key: str = "cell_type"):
    """Simple channel-based segmentation using a marker-to-population dictionary

    Args:
        adata: An `AnnData` object
        marker_cell_dict: Dictionary whose keys are channels, and values are the corresponding populations.
        cell_type_key: Key of `adata.obs` where annotations will be stored
    """
    adata.obsm[SopaKeys.Z_SCORES] = preprocess_fluo(adata)

    markers, cell_types = list(marker_cell_dict.keys()), np.array(list(marker_cell_dict.values()))
    ct_indices = adata.obsm[SopaKeys.Z_SCORES][markers].values.argmax(1)

    adata.obs[cell_type_key] = cell_types[ct_indices]
    adata.uns[SopaKeys.UNS_KEY][SopaKeys.UNS_CELL_TYPES] = [cell_type_key]

    log.info(f"Annotation counts: {adata.obs[cell_type_key].value_counts()}")

sopa.utils.preprocess_fluo(adata)

Preprocess fluorescence data. For each column \(X\), we compute \(asinh(\frac{X}{5Q(0.2, X)})\) and apply standardization

Parameters:

Name Type Description Default
adata AnnData

An AnnData object

required

Returns:

Type Description
DataFrame

A dataframe of preprocessed channels intensities

Source code in sopa/utils/annotation.py
def preprocess_fluo(adata: AnnData) -> pd.DataFrame:
    """Preprocess fluorescence data. For each column $X$, we compute $asinh(\\frac{X}{5Q(0.2, X)})$ and apply standardization

    Args:
        adata: An `AnnData` object

    Returns:
        A dataframe of preprocessed channels intensities
    """
    if SopaKeys.INTENSITIES_OBSM in adata.obsm:
        df = adata.obsm[SopaKeys.INTENSITIES_OBSM]
    else:
        df = adata.to_df()

    divider = 5 * np.quantile(df, 0.2, axis=0)
    divider[divider == 0] = df.max(axis=0)[divider == 0]

    scaled = np.arcsinh(df / divider)
    return (scaled - scaled.mean(0)) / scaled.std(0)