Skip to content

Spatial operations

sopa.spatial.sjoin(sdata, left_element, right_element, how='left', target_coordinate_system=None, **kwargs)

Spatial join of two shapes GeoDataFrames, as in geopandas.sjoin.

Shapes are automatically aligned on the same coordinate system (which can be chosen using the target_coordinate_system argument).

Parameters:

Name Type Description Default
sdata SpatialData

A SpatialData object

required
left_element str | GeoDataFrame

The name of a GeoDataFrame in sdata, or the GeoDataFrame itself

required
right_element str | GeoDataFrame

The name of a GeoDataFrame in sdata, or the GeoDataFrame itself

required
how str

The GeoPandas type of join. By default, left geometries are retained.

'left'
target_coordinate_system str | None

The name of the coordinate system on which the shapes will be transformed. By default, uses the intrinsic coordinate system of the left_element.

None
**kwargs int

Kwargs provided to the geopandas.sjoin function

{}

Returns:

Type Description
GeoDataFrame

The joined GeoDataFrame

Source code in sopa/spatial/join.py
def sjoin(
    sdata: SpatialData,
    left_element: str | gpd.GeoDataFrame,
    right_element: str | gpd.GeoDataFrame,
    how: str = "left",
    target_coordinate_system: str | None = None,
    **kwargs: int,
) -> gpd.GeoDataFrame:
    """Spatial join of two `shapes` GeoDataFrames, as in [geopandas.sjoin](https://geopandas.org/en/stable/docs/reference/api/geopandas.sjoin.html).

    Shapes are automatically aligned on the same coordinate system (which can be chosen using the `target_coordinate_system` argument).

    Args:
        sdata: A `SpatialData` object
        left_element: The name of a GeoDataFrame in `sdata`, or the GeoDataFrame itself
        right_element: The name of a GeoDataFrame in `sdata`, or the GeoDataFrame itself
        how: The GeoPandas type of join. By default, left geometries are retained.
        target_coordinate_system: The name of the coordinate system on which the shapes will be transformed. By default, uses the intrinsic coordinate system of the `left_element`.
        **kwargs: Kwargs provided to the [geopandas.sjoin](https://geopandas.org/en/stable/docs/reference/api/geopandas.sjoin.html) function

    Returns:
        The joined `GeoDataFrame`
    """
    if isinstance(left_element, str):
        left_element = sdata[left_element]
    if isinstance(right_element, str):
        right_element = sdata[right_element]

    if target_coordinate_system is None:
        right_element = to_intrinsic(sdata, right_element, left_element)
    else:
        left_element = sdata.transform_element_to_coordinate_system(left_element, target_coordinate_system)
        right_element = sdata.transform_element_to_coordinate_system(right_element, target_coordinate_system)

    return gpd.sjoin(left_element, right_element, how=how, **kwargs)

sopa.spatial.assign_transcript_to_cell(sdata, points_key=None, shapes_key=None, key_added='cell_index', unassigned_value=None)

Assign each transcript to a cell based on the cell boundaries. It updates the transcript dataframe by adding a new column.

Parameters:

Name Type Description Default
sdata SpatialData

A SpatialData object

required
points_key str | None

Key of the spatialdata object containing the transcript dataframe.

None
shapes_key str | None

Key of the spatialdata object containing the cell boundaries.

None
key_added str

Key that will be added to the transcript dataframe containing the cell ID

'cell_index'
unassigned_value int | None

If None, transcripts that are not inside any cell will be assigned to NaN. If an integer, this value will be used as the unassigned value.

None
Source code in sopa/spatial/join.py
def assign_transcript_to_cell(
    sdata: SpatialData,
    points_key: str | None = None,
    shapes_key: str | None = None,
    key_added: str = "cell_index",
    unassigned_value: int | None = None,
):
    """Assign each transcript to a cell based on the cell boundaries. It updates the transcript dataframe by adding a new column.

    Args:
        sdata: A `SpatialData` object
        points_key: Key of the spatialdata object containing the transcript dataframe.
        shapes_key: Key of the spatialdata object containing the cell boundaries.
        key_added: Key that will be added to the transcript dataframe containing the cell ID
        unassigned_value: If `None`, transcripts that are not inside any cell will be assigned to NaN. If an integer, this value will be used as the unassigned value.
    """
    df = get_spatial_element(sdata.points, points_key or sdata.attrs.get(SopaAttrs.TRANSCRIPTS))
    geo_df = get_boundaries(sdata, key=shapes_key)

    geo_df = to_intrinsic(sdata, geo_df, df)
    geo_df = geo_df.reset_index()

    get_cell_id = partial(_get_cell_id, geo_df, unassigned_value=unassigned_value)

    if isinstance(df, dd.DataFrame):
        df[key_added] = df.map_partitions(get_cell_id)
    else:
        raise ValueError(f"Invalid dataframe type: {type(df)}")

sopa.spatial.mean_distance(adata, group_key, target_group_key=None, ignore_zeros=False)

Mean distance between two groups (typically, between cell-types, or between cell-types and domains)

Note

The distance is a number of hops, i.e. a distance of 10 between a pDC and a T cell means that there are 10 cells on the closest path from one to the other cell.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object, or a SpatialData object

required
group_key str

Key of adata.obs containing the groups

required
target_group_key str | None

Key of adata.obs containing the target groups (by default, uses group_key)

None
ignore_zeros bool

If True, a cell distance to its own group is 0.

False

Returns:

Type Description
DataFrame

DataFrame of shape n_groups * n_groups_target of mean hop-distances

Source code in sopa/spatial/distance.py
def mean_distance(
    adata: AnnData, group_key: str, target_group_key: str | None = None, ignore_zeros: bool = False
) -> pd.DataFrame:
    """Mean distance between two groups (typically, between cell-types, or between cell-types and domains)

    Note:
        The distance is a number of hops, i.e. a distance of 10 between a pDC and a T cell means that there are 10 cells on the closest path from one to the other cell.

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        group_key: Key of `adata.obs` containing the groups
        target_group_key: Key of `adata.obs` containing the target groups (by default, uses `group_key`)
        ignore_zeros: If `True`, a cell distance to its own group is 0.

    Returns:
        `DataFrame` of shape `n_groups * n_groups_target` of mean hop-distances
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    target_group_key = group_key if target_group_key is None else target_group_key

    df_distances = cells_to_groups(adata, target_group_key, None, ignore_zeros=ignore_zeros)

    if ignore_zeros:
        df_distances.replace(0, np.nan, inplace=True)

    df_distances[group_key] = adata.obs[group_key]
    df_distances = df_distances.groupby(group_key, observed=False).mean()
    df_distances.columns.name = target_group_key

    return df_distances

sopa.spatial.geometrize_niches(adata, niche_key, buffer='auto', perc_area_th=0.05)

Converts the niches to shapely polygons, and put into a GeoDataFrame. Note that each niche can appear multiple times, as they can be separated by other niches ; in this case, we call them different "components" of the same niche ID.

Plot components

You can show niches components with GeoPandas

gdf = geometrize_niches(adata, niche_key)
gdf.plot(column=niche_key)

Parameters:

Name Type Description Default
adata AnnData | SpatialData

An AnnData object, or a SpatialData object

required
niche_key str

Key of adata.obs containing the niches

required
buffer int | str

Expansion radius applied on components. By default, 3 * mean_distance_neighbors

'auto'
perc_area_th float

For each niche, components whose area is less than perc_area_th * max_component_area will be removed

0.05

Returns:

Type Description
GeoDataFrame

A GeoDataFrame with geometries for each niche component. We also compute the area/perimeter/roundness of each component.

Source code in sopa/spatial/morpho.py
def geometrize_niches(
    adata: AnnData | SpatialData,
    niche_key: str,
    buffer: int | str = "auto",
    perc_area_th: float = 0.05,
) -> gpd.GeoDataFrame:
    """Converts the niches to shapely polygons, and put into a `GeoDataFrame`. Note that each niche can appear multiple times, as they can be separated by other niches ; in this case, we call them different "components" of the same niche ID.

    Plot components:
        You can show niches components with GeoPandas
        ```py
        gdf = geometrize_niches(adata, niche_key)
        gdf.plot(column=niche_key)
        ```

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        niche_key: Key of `adata.obs` containing the niches
        buffer: Expansion radius applied on components. By default, `3 * mean_distance_neighbors`
        perc_area_th: For each niche, components whose area is less than `perc_area_th * max_component_area` will be removed

    Returns:
        A `GeoDataFrame` with geometries for each niche component. We also compute the area/perimeter/roundness of each component.
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    _check_has_delaunay(adata)
    data = {"geometry": [], niche_key: []}

    delaunay = Delaunay(adata.obsm["spatial"])
    connectivities = adata.obsp["spatial_connectivities"]
    values = adata.obs[niche_key].values

    keep = (
        (connectivities[delaunay.simplices[:, 0], delaunay.simplices[:, 1]].A1 == 1)
        & (connectivities[delaunay.simplices[:, 0], delaunay.simplices[:, 2]].A1 == 1)
        & (connectivities[delaunay.simplices[:, 1], delaunay.simplices[:, 2]].A1 == 1)
        & (values[delaunay.simplices[:, 0]] == values[delaunay.simplices[:, 1]])
        & (values[delaunay.simplices[:, 0]] == values[delaunay.simplices[:, 2]])
        & (values[delaunay.simplices[:, 0]] == values[delaunay.simplices[:, 2]])
    )  # Keep simplices that are in the original Delaunay graph, and which are not in between different value categories

    neighbors = np.where(np.isin(delaunay.neighbors, np.where(~keep)[0]), -1, delaunay.neighbors)

    simplices_to_visit = set(np.where(keep)[0])

    while simplices_to_visit:
        component = Component(adata, delaunay, neighbors)
        component.visit(simplices_to_visit)

        data["geometry"].append(component.polygon)
        data[niche_key].append(values[component.first_vertex_index()])

    gdf = gpd.GeoDataFrame(data)

    if buffer is not None and buffer != 0:
        gdf = _clean_components(adata, gdf, niche_key, buffer)

    gdf[SopaKeys.GEOMETRY_LENGTH] = gdf.length
    gdf[SopaKeys.GEOMETRY_AREA] = gdf.area
    gdf[SopaKeys.GEOMETRY_ROUNDNESS] = 4 * np.pi * gdf[SopaKeys.GEOMETRY_AREA] / gdf[SopaKeys.GEOMETRY_LENGTH] ** 2

    # Remove minor components (compared to the largest component of its corresponding niche)
    gdf = gdf[gdf.area >= gdf[niche_key].map(gdf.groupby(niche_key).area.max() * perc_area_th)]

    return gdf

sopa.spatial.niches_geometry_stats(adata, niche_key, aggregation='min', key_added_suffix='_distance_to_niche_', **geometrize_niches_kwargs)

Computes statistics over niches geometries

Details
  • n_components: Number of connected component of a niche (a component is a group of neighbor cells with the same niche attribute)
  • length: Mean distance of the exterior/boundary of the components of a niche
  • area: Mean area of the components of a niche
  • roundness: Float value between 0 and 1. The higher the value, the closer to a circle. Computed via 4 * pi * area / length**2
  • mean_distance_to_niche_X: mean distance to the niche (between the two closest points of the niches)

Parameters:

Name Type Description Default
adata AnnData | SpatialData

An AnnData object, or a SpatialData object

required
niche_key str

Key of adata.obs containing the niches

required
aggregation str | list[str]

Aggregation mode. Either one string such as "min", or a list such as ["mean", "min"].

'min'
key_added_suffix str

Suffix added in the DataFrame columns. Defaults to "distance_to_niche".

'_distance_to_niche_'
geometrize_niches_kwargs str

Kwargs to the sopa.spatial.geometrize_niches function

{}

Returns:

Type Description
GeoDataFrame

A DataFrame of shape n_niches * n_statistics

Source code in sopa/spatial/morpho.py
def niches_geometry_stats(
    adata: AnnData | SpatialData,
    niche_key: str,
    aggregation: str | list[str] = "min",
    key_added_suffix: str = "_distance_to_niche_",
    **geometrize_niches_kwargs: str,
) -> gpd.GeoDataFrame:
    """Computes statistics over niches geometries

    Details:
        - `n_components`: Number of connected component of a niche (a component is a group of neighbor cells with the same niche attribute)
        - `length`: Mean distance of the exterior/boundary of the components of a niche
        - `area`: Mean area of the components of a niche
        - `roundness`: Float value between 0 and 1. The higher the value, the closer to a circle. Computed via `4 * pi * area / length**2`
        - `mean_distance_to_niche_X`: mean distance to the niche (between the two closest points of the niches)

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        niche_key: Key of `adata.obs` containing the niches
        aggregation: Aggregation mode. Either one string such as `"min"`, or a list such as `["mean", "min"]`.
        key_added_suffix: Suffix added in the DataFrame columns. Defaults to "_distance_to_niche_".
        geometrize_niches_kwargs: Kwargs to the `sopa.spatial.geometrize_niches` function

    Returns:
        A `DataFrame` of shape `n_niches * n_statistics`
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    gdf = geometrize_niches(adata, niche_key, **geometrize_niches_kwargs)
    value_counts = gdf[niche_key].value_counts()

    assert len(gdf), "No niche geometry found, stats can't be computed"

    log.info(f"Computing pairwise distances between {len(gdf)} components")
    pairwise_distances: pd.DataFrame = gdf.geometry.apply(lambda g: gdf.distance(g))
    pairwise_distances[niche_key] = gdf[niche_key]

    if isinstance(aggregation, str):
        aggregation = [aggregation]

    for aggr in aggregation:
        df = pairwise_distances.groupby(niche_key).aggregate(aggr).T
        df.columns = [f"{aggr}{key_added_suffix}{c}" for c in df.columns]
        gdf[df.columns] = df

    df_stats: pd.DataFrame = gdf.groupby(niche_key)[gdf.columns[2:]].mean()
    df_stats.insert(0, SopaKeys.GEOMETRY_COUNT, value_counts)

    return df_stats

sopa.spatial.cells_to_groups(adata, group_key, key_added_prefix=None, ignore_zeros=False)

Compute the hop-distance between each cell and a cell category/group.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object, or a SpatialData object

required
group_key str

Key of adata.obs containing the groups

required
key_added_prefix str | None

Prefix to the key added in adata.obsm. If None, will return the DataFrame instead of saving it.

None
ignore_zeros bool

If True, a cell distance to its own group is 0.

False

Returns:

Type Description
DataFrame | None

A Dataframe of shape n_obs * n_groups, or None if key_added_prefix was provided (in this case, the dataframe is saved in "{key_added_prefix}{group_key}")

Source code in sopa/spatial/distance.py
def cells_to_groups(
    adata: AnnData,
    group_key: str,
    key_added_prefix: str | None = None,
    ignore_zeros: bool = False,
) -> pd.DataFrame | None:
    """Compute the hop-distance between each cell and a cell category/group.

    Args:
        adata: An `AnnData` object, or a `SpatialData object`
        group_key: Key of `adata.obs` containing the groups
        key_added_prefix: Prefix to the key added in `adata.obsm`. If `None`, will return the `DataFrame` instead of saving it.
        ignore_zeros: If `True`, a cell distance to its own group is 0.

    Returns:
        A `Dataframe` of shape `n_obs * n_groups`, or `None` if `key_added_prefix` was provided (in this case, the dataframe is saved in `"{key_added_prefix}{group_key}"`)
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    _check_has_delaunay(adata)

    distances_to_groups = {}

    if not adata.obs[group_key].dtype.name == "category":
        log.info(f"Converting adata.obs['{group_key}'] to category")
        adata.obs[group_key] = adata.obs[group_key].astype("category")

    for group_id in tqdm(adata.obs[group_key].cat.categories):
        group_nodes = np.where(adata.obs[group_key] == group_id)[0]

        distances = np.full(adata.n_obs, np.nan)

        if not ignore_zeros:
            distances[group_nodes] = 0
            visited = set(group_nodes)
        else:
            visited = set()

        queue = group_nodes
        current_distance = 0

        while len(queue):
            distances[queue] = current_distance

            neighbors = set(adata.obsp["spatial_connectivities"][queue].indices)
            queue = np.array(list(neighbors - visited))
            visited |= neighbors

            current_distance += 1

        distances_to_groups[group_id] = distances

    df_distances = pd.DataFrame(distances_to_groups, index=adata.obs_names)

    if key_added_prefix is None:
        return df_distances
    adata.obsm[f"{key_added_prefix}{group_key}"] = df_distances

sopa.spatial.spatial_neighbors(adata, radius, library_key=None, percentile=None, set_diag=False)

Create a Delaunay graph from spatial coordinates. This function comes from squidpy.

Parameters:

Name Type Description Default
adata AnnData | SpatialData

AnnData object

required
radius tuple[float, float] | None

tuple that prunes the final graph to only contain edges in interval [min(radius), max(radius)]. If None, all edges are kept.

required
library_key str | None

Optional batch key in adata.obs

None
percentile float | None

Percentile of the distances to use as threshold.

None
set_diag bool

Whether to set the diagonal of the spatial connectivities to 1.0.

False
Source code in sopa/spatial/_build.py
def spatial_neighbors(
    adata: AnnData | SpatialData,
    radius: tuple[float, float] | None,
    library_key: str | None = None,
    percentile: float | None = None,
    set_diag: bool = False,
):
    """Create a Delaunay graph from spatial coordinates. This function comes from [squidpy](https://squidpy.readthedocs.io/en/latest/api/squidpy.gr.spatial_neighbors.html#squidpy.gr.spatial_neighbors).

    Args:
        adata: AnnData object
        radius: tuple that prunes the final graph to only contain edges in interval `[min(radius), max(radius)]`. If `None`, all edges are kept.
        library_key: Optional batch key in adata.obs
        percentile: Percentile of the distances to use as threshold.
        set_diag: Whether to set the diagonal of the spatial connectivities to `1.0`.
    """
    if isinstance(adata, SpatialData):
        adata = adata.tables[SopaKeys.TABLE]

    assert radius is None or len(radius) == 2, "Radius is expected to be a tuple (min_radius, max_radius)"

    log.info("Computing delaunay graph")

    if library_key is not None:
        assert adata.obs[library_key].dtype == "category"
        libs = adata.obs[library_key].cat.categories
        make_index_unique(adata.obs_names)
    else:
        libs = [None]

    _build_fun = partial(
        _spatial_neighbor,
        set_diag=set_diag,
        radius=radius,
        percentile=percentile,
    )

    if library_key is not None:
        mats: list[tuple[spmatrix, spmatrix]] = []
        ixs = []  # type: ignore[var-annotated]
        for lib in libs:
            ixs.extend(np.where(adata.obs[library_key] == lib)[0])
            mats.append(_build_fun(adata[adata.obs[library_key] == lib]))
        ixs = np.argsort(ixs)  # type: ignore[assignment] # invert
        Adj = block_diag([m[0] for m in mats], format="csr")[ixs, :][:, ixs]
        Dst = block_diag([m[1] for m in mats], format="csr")[ixs, :][:, ixs]
    else:
        Adj, Dst = _build_fun(adata)

    adata.obsp["spatial_connectivities"] = Adj
    adata.obsp["spatial_distances"] = Dst

sopa.spatial.prepare_network(adata, cell_type_key, niche_key, clip_weight=3, node_colors=('#5c7dc4', '#f05541'), node_sizes=(1.3, 5))

Create a dataframe representing weights between cell-types and/or niches. This can be later use to plot a cell-type/niche represention of a whole slide using the netgraph library.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object

required
cell_type_key str

Key of adata.obs containing the cell types

required
niche_key str

Key of adata.obs containing the niches

required
clip_weight float

Maximum weight

3
node_colors tuple[str]

Tuple of (cell-type color, niche color)

('#5c7dc4', '#f05541')
node_sizes float

Tuple of (cell-type size, niche size)

(1.3, 5)

Returns:

Type Description
tuple[DataFrame, dict, dict, dict]

A DataFrame of weights between cell-types and/or niches, and three dict for netgraph display

Source code in sopa/spatial/distance.py
def prepare_network(
    adata: AnnData,
    cell_type_key: str,
    niche_key: str,
    clip_weight: float = 3,
    node_colors: tuple[str] = ("#5c7dc4", "#f05541"),
    node_sizes: float = (1.3, 5),
) -> tuple[pd.DataFrame, dict, dict, dict]:
    """Create a dataframe representing weights between cell-types and/or niches.
    This can be later use to plot a cell-type/niche represention of a whole slide
    using the netgraph library.

    Args:
        adata: An `AnnData` object
        cell_type_key: Key of `adata.obs` containing the cell types
        niche_key: Key of `adata.obs` containing the niches
        clip_weight: Maximum weight
        node_colors: Tuple of (cell-type color, niche color)
        node_sizes: Tuple of (cell-type size, niche size)

    Returns:
        A DataFrame of weights between cell-types and/or niches, and three dict for netgraph display
    """
    node_color, node_size, node_shape = {}, {}, {}

    log.info("Computing all distances for the 4 pairs of categories")
    weights = mean_distance(adata, cell_type_key)
    top_right = mean_distance(adata, cell_type_key, niche_key)
    bottom_left = mean_distance(adata, niche_key, cell_type_key)
    bottom_right = mean_distance(adata, niche_key, niche_key)

    for pop in weights.index:
        node_color[pop] = node_colors[0]
        node_size[pop] = node_sizes[0]
        node_shape[pop] = "o"

    for niche in bottom_right.index:
        node_color[niche] = node_colors[1]
        node_size[niche] = node_sizes[1]
        node_shape[niche] = "h"

    # assemble dataframe per-block
    bottom_left[bottom_right.columns] = bottom_right
    weights[top_right.columns] = top_right
    weights = pd.concat([weights, bottom_left], axis=0).copy()

    # convert distances to symmetric weights
    weights = 1 / weights
    np.fill_diagonal(weights.values, 0)
    weights = weights.clip(0, clip_weight)
    weights = (weights.T + weights) / 2

    return weights, node_color, node_size, node_shape