Source code for dgl.label_informativeness

"""Utils for computing graph label informativeness"""
from . import to_bidirected

try:
    import torch
except ImportError:
    HAS_TORCH = False
else:
    HAS_TORCH = True

__all__ = ["edge_label_informativeness", "node_label_informativeness"]


def check_pytorch():
    """Check if PyTorch is the backend."""
    if HAS_TORCH is False:
        raise ModuleNotFoundError(
            "This function requires PyTorch to be the backend."
        )


[docs]def edge_label_informativeness(graph, y, eps=1e-8):
    r"""Label informativeness (:math:`\mathrm{LI}`) is a characteristic of
    labeled graphs proposed in the `Characterizing Graph Datasets for Node
    Classification: Homophily-Heterophily Dichotomy and Beyond
    <https://arxiv.org/abs/2209.06177>`__

    Label informativeness shows how much information about a node's label we
    get from knowing its neighbor's label. Formally, assume that we sample an
    edge :math:`(\xi,\eta) \in E`. The class labels of nodes :math:`\xi` and
    :math:`\eta` are then random variables :math:`y_\xi` and :math:`y_\eta`.
    We want to measure the amount of knowledge the label :math:`y_\eta` gives
    for predicting :math:`y_\xi`. The entropy :math:`H(y_\xi)` measures the
    `hardness' of predicting the label of :math:`\xi` without knowing
    :math:`y_\eta`. Given :math:`y_\eta`, this value is reduced to the
    conditional entropy :math:`H(y_\xi|y_\eta)`. In other words, :math:`y_\eta`
    reveals :math:`I(y_\xi,y_\eta) = H(y_\xi) - H(y_\xi|y_\eta)` information
    about the label. To make the obtained quantity comparable across different
    datasets, label informativeness is defined as the normalized mutual
    information of :math:`y_{\xi}` and :math:`y_{\eta}`:

    .. math::
      \mathrm{LI} = \frac{I(y_\xi,y_\eta)}{H(y_\xi)}

    Depending on the distribution used for sampling an edge
    :math:`(\xi, \eta)`, several variants of label informativeness can be
    obtained. Two of them are particularly intuitive: in edge label
    informativeness (:math:`\mathrm{LI}_{edge}`), edges are sampled uniformly
    at random, and in node label informativeness (:math:`\mathrm{LI}_{node}`),
    first a node is sampled uniformly at random and then an edge incident to it
    is sampled uniformly at random. These two versions of label informativeness
    differ in how they weight high/low-degree nodes. In edge label
    informativeness, averaging is over the edges, thus high-degree nodes are
    given more weight. In node label informativeness, averaging is over the
    nodes, so all nodes are weighted equally.

    This function computes edge label informativeness.

    Parameters
    ----------
    graph : DGLGraph
        The graph.
    y : torch.Tensor
        The node labels, which is a tensor of shape (|V|).
    eps : float, optional
        A small constant for numerical stability. (default: 1e-8)

    Returns
    -------
    float
        The edge label informativeness value.

    Examples
    --------
    >>> import dgl
    >>> import torch

    >>> graph = dgl.graph(([0, 1, 2, 2, 3, 4], [1, 2, 0, 3, 4, 5]))
    >>> y = torch.tensor([0, 0, 0, 0, 1, 1])
    >>> dgl.edge_label_informativeness(graph, y)
    0.25177597999572754
    """
    check_pytorch()

    graph = to_bidirected(graph.cpu()).to(y.device)

    degrees = graph.in_degrees().float()
    num_classes = y.max() + 1
    class_degree_weighted_probs = torch.zeros(num_classes).to(y.device)
    class_degree_weighted_probs.index_add_(dim=0, index=y, source=degrees)
    class_degree_weighted_probs /= class_degree_weighted_probs.sum()

    edge_probs = torch.zeros(num_classes, num_classes).to(y.device)
    labels_u = y[graph.edges()[0].long()]
    labels_v = y[graph.edges()[1].long()]
    edge_probs.index_put_(
        indices=(labels_u, labels_v),
        values=torch.ones(graph.num_edges()).to(y.device),
        accumulate=True,
    )
    edge_probs /= edge_probs.sum()
    edge_probs += eps

    numerator = (edge_probs * torch.log(edge_probs)).sum()
    denominator = (
        class_degree_weighted_probs * torch.log(class_degree_weighted_probs)
    ).sum()
    li_edge = 2 - numerator / denominator

    return li_edge.item()


[docs]def node_label_informativeness(graph, y, eps=1e-8):
    r"""Label informativeness (:math:`\mathrm{LI}`) is a characteristic of
    labeled graphs proposed in the `Characterizing Graph Datasets for Node
    Classification: Homophily-Heterophily Dichotomy and Beyond
    <https://arxiv.org/abs/2209.06177>`__

    Label informativeness shows how much information about a node's label we
    get from knowing its neighbor's label. Formally, assume that we sample an
    edge :math:`(\xi,\eta) \in E`. The class labels of nodes :math:`\xi` and
    :math:`\eta` are then random variables :math:`y_\xi` and :math:`y_\eta`.
    We want to measure the amount of knowledge the label :math:`y_\eta` gives
    for predicting :math:`y_\xi`. The entropy :math:`H(y_\xi)` measures the
    `hardness' of predicting the label of :math:`\xi` without knowing
    :math:`y_\eta`. Given :math:`y_\eta`, this value is reduced to the
    conditional entropy :math:`H(y_\xi|y_\eta)`. In other words, :math:`y_\eta`
    reveals :math:`I(y_\xi,y_\eta) = H(y_\xi) - H(y_\xi|y_\eta)` information
    about the label. To make the obtained quantity comparable across different
    datasets, label informativeness is defined as the normalized mutual
    information of :math:`y_{\xi}` and :math:`y_{\eta}`:

    .. math::
      \mathrm{LI} = \frac{I(y_\xi,y_\eta)}{H(y_\xi)}

    Depending on the distribution used for sampling an edge
    :math:`(\xi, \eta)`, several variants of label informativeness can be
    obtained. Two of them are particularly intuitive: in edge label
    informativeness (:math:`\mathrm{LI}_{edge}`), edges are sampled uniformly
    at random, and in node label informativeness (:math:`\mathrm{LI}_{node}`),
    first a node is sampled uniformly at random and then an edge incident to it
    is sampled uniformly at random. These two versions of label informativeness
    differ in how they weight high/low-degree nodes. In edge label
    informativeness, averaging is over the edges, thus high-degree nodes are
    given more weight. In node label informativeness, averaging is over the
    nodes, so all nodes are weighted equally.

    This function computes node label informativeness.

    Parameters
    ----------
    graph : DGLGraph
        The graph.
    y : torch.Tensor
        The node labels, which is a tensor of shape (|V|).
    eps : float, optional
        A small constant for numerical stability. (default: 1e-8)

    Returns
    -------
    float
        The node label informativeness value.

    Examples
    --------
    >>> import dgl
    >>> import torch

    >>> graph = dgl.graph(([0, 1, 2, 2, 3, 4], [1, 2, 0, 3, 4, 5]))
    >>> y = torch.tensor([0, 0, 0, 0, 1, 1])
    >>> dgl.node_label_informativeness(graph, y)
    0.3381872773170471
    """
    check_pytorch()

    graph = to_bidirected(graph.cpu()).to(y.device)

    degrees = graph.in_degrees().float()
    num_classes = y.max() + 1

    class_probs = torch.zeros(num_classes).to(y.device)
    class_probs.index_add_(
        dim=0, index=y, source=torch.ones(graph.num_nodes()).to(y.device)
    )
    class_probs /= class_probs.sum()

    class_degree_weighted_probs = torch.zeros(num_classes).to(y.device)
    class_degree_weighted_probs.index_add_(dim=0, index=y, source=degrees)
    class_degree_weighted_probs /= class_degree_weighted_probs.sum()

    num_nonzero_degree_nodes = (degrees > 0).sum()

    edge_probs = torch.zeros(num_classes, num_classes).to(y.device)
    labels_u = y[graph.edges()[0].long()]
    labels_v = y[graph.edges()[1].long()]
    degrees_u = degrees[graph.edges()[0].long()]
    edge_probs.index_put_(
        indices=(labels_u, labels_v),
        values=1 / (num_nonzero_degree_nodes * degrees_u),
        accumulate=True,
    )
    edge_probs += eps

    log = torch.log(
        edge_probs
        / (class_probs[:, None] * class_degree_weighted_probs[None, :])
    )
    numerator = (edge_probs * log).sum()
    denominator = (class_probs * torch.log(class_probs)).sum()
    li_node = -numerator / denominator

    return li_node.item()