Skip to content

sopa.spatial

sopa.spatial.sjoin(sdata, left_element, right_element, how='left', target_coordinate_system=None, **kwargs)

Spatial join of two shapes GeoDataFrames, as in geopandas.sjoin.

Shapes are automatically aligned on the same coordinate system (which can be chosen using the target_coordinate_system argument).

Parameters:

Name Type Description Default
sdata SpatialData

A SpatialData object

required
left_element str | GeoDataFrame

The name of a GeoDataFrame in sdata, or the GeoDataFrame itself

required
right_element str | GeoDataFrame

The name of a GeoDataFrame in sdata, or the GeoDataFrame itself

required
how str

The GeoPandas type of join. By default, left geometries are retained.

'left'
target_coordinate_system str | None

The name of the coordinate system on which the shapes will be transformed. By default, uses the intrinsic coordinate system of the left_element.

None
**kwargs int

Kwargs provided to the geopandas.sjoin function

{}

Returns:

Type Description
GeoDataFrame

The joined GeoDataFrame

Source code in sopa/spatial/utils.py
def sjoin(
    sdata: SpatialData,
    left_element: str | gpd.GeoDataFrame,
    right_element: str | gpd.GeoDataFrame,
    how: str = "left",
    target_coordinate_system: str | None = None,
    **kwargs: int,
) -> gpd.GeoDataFrame:
    """Spatial join of two `shapes` GeoDataFrames, as in [geopandas.sjoin](https://geopandas.org/en/stable/docs/reference/api/geopandas.sjoin.html).

    Shapes are automatically aligned on the same coordinate system (which can be chosen using the `target_coordinate_system` argument).

    Args:
        sdata: A `SpatialData` object
        left_element: The name of a GeoDataFrame in `sdata`, or the GeoDataFrame itself
        right_element: The name of a GeoDataFrame in `sdata`, or the GeoDataFrame itself
        how: The GeoPandas type of join. By default, left geometries are retained.
        target_coordinate_system: The name of the coordinate system on which the shapes will be transformed. By default, uses the intrinsic coordinate system of the `left_element`.
        **kwargs: Kwargs provided to the [geopandas.sjoin](https://geopandas.org/en/stable/docs/reference/api/geopandas.sjoin.html) function

    Returns:
        The joined `GeoDataFrame`
    """
    if isinstance(left_element, str):
        left_element = sdata[left_element]
    if isinstance(right_element, str):
        right_element = sdata[right_element]

    if target_coordinate_system is None:
        target_coordinate_system = get_intrinsic_cs(sdata, left_element)

    left_element = sdata.transform_element_to_coordinate_system(left_element, target_coordinate_system)
    right_element = sdata.transform_element_to_coordinate_system(right_element, target_coordinate_system)

    return gpd.sjoin(left_element, right_element, how=how, **kwargs)

sopa.spatial.mean_distance(adata, group_key, target_group_key=None, ignore_zeros=False)

Mean distance between two groups (typically, between cell-types, or between cell-types and domains)

Note

The distance is a number of hops, i.e. a distance of 10 between a pDC and a T cell means that there are 10 cells on the closest path from one to the other cell.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object, or a SpatialData object

required
group_key str

Key of adata.obs containing the groups

required
target_group_key str | None

Key of adata.obs containing the target groups (by default, uses group_key)

None
ignore_zeros bool

If True, a cell distance to its own group is 0.

False

Returns:

Type Description
DataFrame

DataFrame of shape n_groups * n_groups_target of mean hop-distances

Source code in sopa/spatial/distance.py
def mean_distance(
    adata: AnnData, group_key: str, target_group_key: str | None = None, ignore_zeros: bool = False
) -> pd.DataFrame:
    """Mean distance between two groups (typically, between cell-types, or between cell-types and domains)

    Note:
        The distance is a number of hops, i.e. a distance of 10 between a pDC and a T cell means that there are 10 cells on the closest path from one to the other cell.

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        group_key: Key of `adata.obs` containing the groups
        target_group_key: Key of `adata.obs` containing the target groups (by default, uses `group_key`)
        ignore_zeros: If `True`, a cell distance to its own group is 0.

    Returns:
        `DataFrame` of shape `n_groups * n_groups_target` of mean hop-distances
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    target_group_key = group_key if target_group_key is None else target_group_key

    df_distances = cells_to_groups(adata, target_group_key, None, ignore_zeros=ignore_zeros)

    if ignore_zeros:
        df_distances.replace(0, np.nan, inplace=True)

    df_distances[group_key] = adata.obs[group_key]
    df_distances = df_distances.groupby(group_key, observed=False).mean()
    df_distances.columns.name = target_group_key

    return df_distances

sopa.spatial.geometrize_niches(adata, niche_key, buffer='auto', perc_area_th=0.05)

Converts the niches to shapely polygons, and put into a GeoDataFrame. Note that each niche can appear multiple times, as they can be separated by other niches ; in this case, we call them different "components" of the same niche ID.

Plot components

You can show niches components with GeoPandas

gdf = geometrize_niches(adata, niche_key)
gdf.plot(column=niche_key)

Parameters:

Name Type Description Default
adata AnnData | SpatialData

An AnnData object, or a SpatialData object

required
niche_key str

Key of adata.obs containing the niches

required
buffer int | str

Expansion radius applied on components. By default, 3 * mean_distance_neighbors

'auto'
perc_area_th float

For each niche, components whose area is less than perc_area_th * max_component_area will be removed

0.05

Returns:

Type Description
GeoDataFrame

A GeoDataFrame with geometries for each niche component. We also compute the area/perimeter/roundness of each component.

Source code in sopa/spatial/morpho.py
def geometrize_niches(
    adata: AnnData | SpatialData,
    niche_key: str,
    buffer: int | str = "auto",
    perc_area_th: float = 0.05,
) -> gpd.GeoDataFrame:
    """Converts the niches to shapely polygons, and put into a `GeoDataFrame`. Note that each niche can appear multiple times, as they can be separated by other niches ; in this case, we call them different "components" of the same niche ID.

    Plot components:
        You can show niches components with GeoPandas
        ```py
        gdf = geometrize_niches(adata, niche_key)
        gdf.plot(column=niche_key)
        ```

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        niche_key: Key of `adata.obs` containing the niches
        buffer: Expansion radius applied on components. By default, `3 * mean_distance_neighbors`
        perc_area_th: For each niche, components whose area is less than `perc_area_th * max_component_area` will be removed

    Returns:
        A `GeoDataFrame` with geometries for each niche component. We also compute the area/perimeter/roundness of each component.
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    _check_has_delaunay(adata)
    data = {"geometry": [], niche_key: []}

    delaunay = Delaunay(adata.obsm["spatial"])
    connectivities = adata.obsp["spatial_connectivities"]
    values = adata.obs[niche_key].values

    keep = (
        (connectivities[delaunay.simplices[:, 0], delaunay.simplices[:, 1]].A1 == 1)
        & (connectivities[delaunay.simplices[:, 0], delaunay.simplices[:, 2]].A1 == 1)
        & (connectivities[delaunay.simplices[:, 1], delaunay.simplices[:, 2]].A1 == 1)
        & (values[delaunay.simplices[:, 0]] == values[delaunay.simplices[:, 1]])
        & (values[delaunay.simplices[:, 0]] == values[delaunay.simplices[:, 2]])
        & (values[delaunay.simplices[:, 0]] == values[delaunay.simplices[:, 2]])
    )  # Keep simplices that are in the original Delaunay graph, and which are not in between different value categories

    neighbors = np.where(np.isin(delaunay.neighbors, np.where(~keep)[0]), -1, delaunay.neighbors)

    simplices_to_visit = set(np.where(keep)[0])

    while simplices_to_visit:
        component = Component(adata, delaunay, neighbors)
        component.visit(simplices_to_visit)

        data["geometry"].append(component.polygon)
        data[niche_key].append(values[component.first_vertex_index()])

    gdf = gpd.GeoDataFrame(data)

    if buffer is not None and buffer != 0:
        gdf = _clean_components(adata, gdf, niche_key, buffer)

    gdf[SopaKeys.GEOMETRY_LENGTH] = gdf.length
    gdf[SopaKeys.GEOMETRY_AREA] = gdf.area
    gdf[SopaKeys.GEOMETRY_ROUNDNESS] = 4 * np.pi * gdf[SopaKeys.GEOMETRY_AREA] / gdf[SopaKeys.GEOMETRY_LENGTH] ** 2

    # Remove minor components (compared to the largest component of its corresponding niche)
    gdf = gdf[gdf.area >= gdf[niche_key].map(gdf.groupby(niche_key).area.max() * perc_area_th)]

    return gdf

sopa.spatial.niches_geometry_stats(adata, niche_key, aggregation='min', key_added_suffix='_distance_to_niche_', **geometrize_niches_kwargs)

Computes statistics over niches geometries

Details
  • n_components: Number of connected component of a niche (a component is a group of neighbor cells with the same niche attribute)
  • length: Mean distance of the exterior/boundary of the components of a niche
  • area: Mean area of the components of a niche
  • roundness: Float value between 0 and 1. The higher the value, the closer to a circle. Computed via 4 * pi * area / length**2
  • mean_distance_to_niche_X: mean distance to the niche (between the two closest points of the niches)

Parameters:

Name Type Description Default
adata AnnData | SpatialData

An AnnData object, or a SpatialData object

required
niche_key str

Key of adata.obs containing the niches

required
aggregation str | list[str]

Aggregation mode. Either one string such as "min", or a list such as ["mean", "min"].

'min'
key_added_suffix str

Suffix added in the DataFrame columns. Defaults to "distance_to_niche".

'_distance_to_niche_'
geometrize_niches_kwargs str

Kwargs to the sopa.spatial.geometrize_niches function

{}

Returns:

Type Description
GeoDataFrame

A DataFrame of shape n_niches * n_statistics

Source code in sopa/spatial/morpho.py
def niches_geometry_stats(
    adata: AnnData | SpatialData,
    niche_key: str,
    aggregation: str | list[str] = "min",
    key_added_suffix: str = "_distance_to_niche_",
    **geometrize_niches_kwargs: str,
) -> gpd.GeoDataFrame:
    """Computes statistics over niches geometries

    Details:
        - `n_components`: Number of connected component of a niche (a component is a group of neighbor cells with the same niche attribute)
        - `length`: Mean distance of the exterior/boundary of the components of a niche
        - `area`: Mean area of the components of a niche
        - `roundness`: Float value between 0 and 1. The higher the value, the closer to a circle. Computed via `4 * pi * area / length**2`
        - `mean_distance_to_niche_X`: mean distance to the niche (between the two closest points of the niches)

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        niche_key: Key of `adata.obs` containing the niches
        aggregation: Aggregation mode. Either one string such as `"min"`, or a list such as `["mean", "min"]`.
        key_added_suffix: Suffix added in the DataFrame columns. Defaults to "_distance_to_niche_".
        geometrize_niches_kwargs: Kwargs to the `sopa.spatial.geometrize_niches` function

    Returns:
        A `DataFrame` of shape `n_niches * n_statistics`
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    gdf = geometrize_niches(adata, niche_key, **geometrize_niches_kwargs)
    value_counts = gdf[niche_key].value_counts()

    assert len(gdf), "No niche geometry found, stats can't be computed"

    log.info(f"Computing pairwise distances between {len(gdf)} components")
    pairwise_distances: pd.DataFrame = gdf.geometry.apply(lambda g: gdf.distance(g))
    pairwise_distances[niche_key] = gdf[niche_key]

    if isinstance(aggregation, str):
        aggregation = [aggregation]

    for aggr in aggregation:
        df = pairwise_distances.groupby(niche_key).aggregate(aggr).T
        df.columns = [f"{aggr}{key_added_suffix}{c}" for c in df.columns]
        gdf[df.columns] = df

    df_stats: pd.DataFrame = gdf.groupby(niche_key)[gdf.columns[2:]].mean()
    df_stats.insert(0, SopaKeys.GEOMETRY_COUNT, value_counts)

    return df_stats

sopa.spatial.cells_to_groups(adata, group_key, key_added_prefix=None, ignore_zeros=False)

Compute the hop-distance between each cell and a cell category/group.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object, or a SpatialData object

required
group_key str

Key of adata.obs containing the groups

required
key_added_prefix str | None

Prefix to the key added in adata.obsm. If None, will return the DataFrame instead of saving it.

None
ignore_zeros bool

If True, a cell distance to its own group is 0.

False

Returns:

Type Description
DataFrame | None

A Dataframe of shape n_obs * n_groups, or None if key_added_prefix was provided (in this case, the dataframe is saved in "{key_added_prefix}{group_key}")

Source code in sopa/spatial/distance.py
def cells_to_groups(
    adata: AnnData,
    group_key: str,
    key_added_prefix: str | None = None,
    ignore_zeros: bool = False,
) -> pd.DataFrame | None:
    """Compute the hop-distance between each cell and a cell category/group.

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        group_key: Key of `adata.obs` containing the groups
        key_added_prefix: Prefix to the key added in `adata.obsm`. If `None`, will return the `DataFrame` instead of saving it.
        ignore_zeros: If `True`, a cell distance to its own group is 0.

    Returns:
        A `Dataframe` of shape `n_obs * n_groups`, or `None` if `key_added_prefix` was provided (in this case, the dataframe is saved in `"{key_added_prefix}{group_key}"`)
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    _check_has_delaunay(adata)

    distances_to_groups = {}

    if not adata.obs[group_key].dtype.name == "category":
        log.info(f"Converting adata.obs['{group_key}'] to category")
        adata.obs[group_key] = adata.obs[group_key].astype("category")

    for group_id in tqdm(adata.obs[group_key].cat.categories):
        group_nodes = np.where(adata.obs[group_key] == group_id)[0]

        distances = np.full(adata.n_obs, np.nan)

        if not ignore_zeros:
            distances[group_nodes] = 0
            visited = set(group_nodes)
        else:
            visited = set()

        queue = group_nodes
        current_distance = 0

        while len(queue):
            distances[queue] = current_distance

            neighbors = set(adata.obsp["spatial_connectivities"][queue].indices)
            queue = np.array(list(neighbors - visited))
            visited |= neighbors

            current_distance += 1

        distances_to_groups[group_id] = distances

    df_distances = pd.DataFrame(distances_to_groups, index=adata.obs_names)

    if key_added_prefix is None:
        return df_distances
    adata.obsm[f"{key_added_prefix}{group_key}"] = df_distances

sopa.spatial.spatial_neighbors(adata, radius, library_key=None, percentile=None, set_diag=False)

Create a Delaunay graph from spatial coordinates. This function comes from squidpy.

Parameters:

Name Type Description Default
adata AnnData | SpatialData

AnnData object

required
radius tuple[float, float] | None

tuple that prunes the final graph to only contain edges in interval [min(radius), max(radius)]. If None, all edges are kept.

required
library_key str | None

Optional batch key in adata.obs

None
percentile float | None

Percentile of the distances to use as threshold.

None
set_diag bool

Whether to set the diagonal of the spatial connectivities to 1.0.

False
Source code in sopa/spatial/_build.py
def spatial_neighbors(
    adata: AnnData | SpatialData,
    radius: tuple[float, float] | None,
    library_key: str | None = None,
    percentile: float | None = None,
    set_diag: bool = False,
):
    """Create a Delaunay graph from spatial coordinates. This function comes from [squidpy](https://squidpy.readthedocs.io/en/latest/api/squidpy.gr.spatial_neighbors.html#squidpy.gr.spatial_neighbors).

    Args:
        adata: AnnData object
        radius: tuple that prunes the final graph to only contain edges in interval `[min(radius), max(radius)]`. If `None`, all edges are kept.
        library_key: Optional batch key in adata.obs
        percentile: Percentile of the distances to use as threshold.
        set_diag: Whether to set the diagonal of the spatial connectivities to `1.0`.
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    assert radius is None or len(radius) == 2, "Radius is expected to be a tuple (min_radius, max_radius)"

    log.info("Computing delaunay graph")

    if library_key is not None:
        assert adata.obs[library_key].dtype == "category"
        libs = adata.obs[library_key].cat.categories
        make_index_unique(adata.obs_names)
    else:
        libs = [None]

    _build_fun = partial(
        _spatial_neighbor,
        set_diag=set_diag,
        radius=radius,
        percentile=percentile,
    )

    if library_key is not None:
        mats: list[tuple[spmatrix, spmatrix]] = []
        ixs = []  # type: ignore[var-annotated]
        for lib in libs:
            ixs.extend(np.where(adata.obs[library_key] == lib)[0])
            mats.append(_build_fun(adata[adata.obs[library_key] == lib]))
        ixs = np.argsort(ixs)  # type: ignore[assignment] # invert
        Adj = block_diag([m[0] for m in mats], format="csr")[ixs, :][:, ixs]
        Dst = block_diag([m[1] for m in mats], format="csr")[ixs, :][:, ixs]
    else:
        Adj, Dst = _build_fun(adata)

    adata.obsp["spatial_connectivities"] = Adj
    adata.obsp["spatial_distances"] = Dst

sopa.spatial.prepare_network(adata, cell_type_key, niche_key, clip_weight=3, node_colors=('#5c7dc4', '#f05541'), node_sizes=(1.3, 5))

Create a dataframe representing weights between cell-types and/or niches. This can be later use to plot a cell-type/niche represention of a whole slide using the netgraph library.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object

required
cell_type_key str

Key of adata.obs containing the cell types

required
niche_key str

Key of adata.obs containing the niches

required
clip_weight float

Maximum weight

3
node_colors tuple[str]

Tuple of (cell-type color, niche color)

('#5c7dc4', '#f05541')
node_sizes float

Tuple of (cell-type size, niche size)

(1.3, 5)

Returns:

Type Description
tuple[DataFrame, dict, dict, dict]

A DataFrame of weights between cell-types and/or niches, and three dict for netgraph display

Source code in sopa/spatial/distance.py
def prepare_network(
    adata: AnnData,
    cell_type_key: str,
    niche_key: str,
    clip_weight: float = 3,
    node_colors: tuple[str] = ("#5c7dc4", "#f05541"),
    node_sizes: float = (1.3, 5),
) -> tuple[pd.DataFrame, dict, dict, dict]:
    """Create a dataframe representing weights between cell-types and/or niches.
    This can be later use to plot a cell-type/niche represention of a whole slide
    using the netgraph library.

    Args:
        adata: An `AnnData` object
        cell_type_key: Key of `adata.obs` containing the cell types
        niche_key: Key of `adata.obs` containing the niches
        clip_weight: Maximum weight
        node_colors: Tuple of (cell-type color, niche color)
        node_sizes: Tuple of (cell-type size, niche size)

    Returns:
        A DataFrame of weights between cell-types and/or niches, and three dict for netgraph display
    """
    node_color, node_size, node_shape = {}, {}, {}

    log.info("Computing all distances for the 4 pairs of categories")
    weights = mean_distance(adata, cell_type_key)
    top_right = mean_distance(adata, cell_type_key, niche_key)
    bottom_left = mean_distance(adata, niche_key, cell_type_key)
    bottom_right = mean_distance(adata, niche_key, niche_key)

    for pop in weights.index:
        node_color[pop] = node_colors[0]
        node_size[pop] = node_sizes[0]
        node_shape[pop] = "o"

    for niche in bottom_right.index:
        node_color[niche] = node_colors[1]
        node_size[niche] = node_sizes[1]
        node_shape[niche] = "h"

    # assemble dataframe per-block
    bottom_left[bottom_right.columns] = bottom_right
    weights[top_right.columns] = top_right
    weights = pd.concat([weights, bottom_left], axis=0).copy()

    # convert distances to symmetric weights
    weights = 1 / weights
    np.fill_diagonal(weights.values, 0)
    weights = weights.clip(0, clip_weight)
    weights = (weights.T + weights) / 2

    return weights, node_color, node_size, node_shape