Source code for dgl.label_informativeness
"""Utils for computing graph label informativeness"""
from . import to_bidirected
try:
import torch
except ImportError:
HAS_TORCH = False
else:
HAS_TORCH = True
__all__ = ["edge_label_informativeness", "node_label_informativeness"]
def check_pytorch():
"""Check if PyTorch is the backend."""
if HAS_TORCH is False:
raise ModuleNotFoundError(
"This function requires PyTorch to be the backend."
)
[docs]def edge_label_informativeness(graph, y, eps=1e-8):
r"""Label informativeness (:math:`\mathrm{LI}`) is a characteristic of
labeled graphs proposed in the `Characterizing Graph Datasets for Node
Classification: Homophily-Heterophily Dichotomy and Beyond
<https://arxiv.org/abs/2209.06177>`__
Label informativeness shows how much information about a node's label we
get from knowing its neighbor's label. Formally, assume that we sample an
edge :math:`(\xi,\eta) \in E`. The class labels of nodes :math:`\xi` and
:math:`\eta` are then random variables :math:`y_\xi` and :math:`y_\eta`.
We want to measure the amount of knowledge the label :math:`y_\eta` gives
for predicting :math:`y_\xi`. The entropy :math:`H(y_\xi)` measures the
`hardness' of predicting the label of :math:`\xi` without knowing
:math:`y_\eta`. Given :math:`y_\eta`, this value is reduced to the
conditional entropy :math:`H(y_\xi|y_\eta)`. In other words, :math:`y_\eta`
reveals :math:`I(y_\xi,y_\eta) = H(y_\xi) - H(y_\xi|y_\eta)` information
about the label. To make the obtained quantity comparable across different
datasets, label informativeness is defined as the normalized mutual
information of :math:`y_{\xi}` and :math:`y_{\eta}`:
.. math::
\mathrm{LI} = \frac{I(y_\xi,y_\eta)}{H(y_\xi)}
Depending on the distribution used for sampling an edge
:math:`(\xi, \eta)`, several variants of label informativeness can be
obtained. Two of them are particularly intuitive: in edge label
informativeness (:math:`\mathrm{LI}_{edge}`), edges are sampled uniformly
at random, and in node label informativeness (:math:`\mathrm{LI}_{node}`),
first a node is sampled uniformly at random and then an edge incident to it
is sampled uniformly at random. These two versions of label informativeness
differ in how they weight high/low-degree nodes. In edge label
informativeness, averaging is over the edges, thus high-degree nodes are
given more weight. In node label informativeness, averaging is over the
nodes, so all nodes are weighted equally.
This function computes edge label informativeness.
Parameters
----------
graph : DGLGraph
The graph.
y : torch.Tensor
The node labels, which is a tensor of shape (|V|).
eps : float, optional
A small constant for numerical stability. (default: 1e-8)
Returns
-------
float
The edge label informativeness value.
Examples
--------
>>> import dgl
>>> import torch
>>> graph = dgl.graph(([0, 1, 2, 2, 3, 4], [1, 2, 0, 3, 4, 5]))
>>> y = torch.tensor([0, 0, 0, 0, 1, 1])
>>> dgl.edge_label_informativeness(graph, y)
0.25177597999572754
"""
check_pytorch()
graph = to_bidirected(graph.cpu()).to(y.device)
degrees = graph.in_degrees().float()
num_classes = y.max() + 1
class_degree_weighted_probs = torch.zeros(num_classes).to(y.device)
class_degree_weighted_probs.index_add_(dim=0, index=y, source=degrees)
class_degree_weighted_probs /= class_degree_weighted_probs.sum()
edge_probs = torch.zeros(num_classes, num_classes).to(y.device)
labels_u = y[graph.edges()[0].long()]
labels_v = y[graph.edges()[1].long()]
edge_probs.index_put_(
indices=(labels_u, labels_v),
values=torch.ones(graph.num_edges()).to(y.device),
accumulate=True,
)
edge_probs /= edge_probs.sum()
edge_probs += eps
numerator = (edge_probs * torch.log(edge_probs)).sum()
denominator = (
class_degree_weighted_probs * torch.log(class_degree_weighted_probs)
).sum()
li_edge = 2 - numerator / denominator
return li_edge.item()
[docs]def node_label_informativeness(graph, y, eps=1e-8):
r"""Label informativeness (:math:`\mathrm{LI}`) is a characteristic of
labeled graphs proposed in the `Characterizing Graph Datasets for Node
Classification: Homophily-Heterophily Dichotomy and Beyond
<https://arxiv.org/abs/2209.06177>`__
Label informativeness shows how much information about a node's label we
get from knowing its neighbor's label. Formally, assume that we sample an
edge :math:`(\xi,\eta) \in E`. The class labels of nodes :math:`\xi` and
:math:`\eta` are then random variables :math:`y_\xi` and :math:`y_\eta`.
We want to measure the amount of knowledge the label :math:`y_\eta` gives
for predicting :math:`y_\xi`. The entropy :math:`H(y_\xi)` measures the
`hardness' of predicting the label of :math:`\xi` without knowing
:math:`y_\eta`. Given :math:`y_\eta`, this value is reduced to the
conditional entropy :math:`H(y_\xi|y_\eta)`. In other words, :math:`y_\eta`
reveals :math:`I(y_\xi,y_\eta) = H(y_\xi) - H(y_\xi|y_\eta)` information
about the label. To make the obtained quantity comparable across different
datasets, label informativeness is defined as the normalized mutual
information of :math:`y_{\xi}` and :math:`y_{\eta}`:
.. math::
\mathrm{LI} = \frac{I(y_\xi,y_\eta)}{H(y_\xi)}
Depending on the distribution used for sampling an edge
:math:`(\xi, \eta)`, several variants of label informativeness can be
obtained. Two of them are particularly intuitive: in edge label
informativeness (:math:`\mathrm{LI}_{edge}`), edges are sampled uniformly
at random, and in node label informativeness (:math:`\mathrm{LI}_{node}`),
first a node is sampled uniformly at random and then an edge incident to it
is sampled uniformly at random. These two versions of label informativeness
differ in how they weight high/low-degree nodes. In edge label
informativeness, averaging is over the edges, thus high-degree nodes are
given more weight. In node label informativeness, averaging is over the
nodes, so all nodes are weighted equally.
This function computes node label informativeness.
Parameters
----------
graph : DGLGraph
The graph.
y : torch.Tensor
The node labels, which is a tensor of shape (|V|).
eps : float, optional
A small constant for numerical stability. (default: 1e-8)
Returns
-------
float
The node label informativeness value.
Examples
--------
>>> import dgl
>>> import torch
>>> graph = dgl.graph(([0, 1, 2, 2, 3, 4], [1, 2, 0, 3, 4, 5]))
>>> y = torch.tensor([0, 0, 0, 0, 1, 1])
>>> dgl.node_label_informativeness(graph, y)
0.3381872773170471
"""
check_pytorch()
graph = to_bidirected(graph.cpu()).to(y.device)
degrees = graph.in_degrees().float()
num_classes = y.max() + 1
class_probs = torch.zeros(num_classes).to(y.device)
class_probs.index_add_(
dim=0, index=y, source=torch.ones(graph.num_nodes()).to(y.device)
)
class_probs /= class_probs.sum()
class_degree_weighted_probs = torch.zeros(num_classes).to(y.device)
class_degree_weighted_probs.index_add_(dim=0, index=y, source=degrees)
class_degree_weighted_probs /= class_degree_weighted_probs.sum()
num_nonzero_degree_nodes = (degrees > 0).sum()
edge_probs = torch.zeros(num_classes, num_classes).to(y.device)
labels_u = y[graph.edges()[0].long()]
labels_v = y[graph.edges()[1].long()]
degrees_u = degrees[graph.edges()[0].long()]
edge_probs.index_put_(
indices=(labels_u, labels_v),
values=1 / (num_nonzero_degree_nodes * degrees_u),
accumulate=True,
)
edge_probs += eps
log = torch.log(
edge_probs
/ (class_probs[:, None] * class_degree_weighted_probs[None, :])
)
numerator = (edge_probs * log).sum()
denominator = (class_probs * torch.log(class_probs)).sum()
li_node = -numerator / denominator
return li_node.item()