# Source code for dgl.homophily

"""Utils for tacking graph homophily and heterophily"""
# pylint: disable=W0611
from . import function as fn

try:
import torch
except ImportError:
HAS_TORCH = False
else:
HAS_TORCH = True

def check_pytorch():
"""Check if PyTorch is the backend."""
if HAS_TORCH is False:
raise ModuleNotFoundError(
"This function requires PyTorch to be the backend."
)

def get_long_edges(graph):
"""Internal function for getting the edges of a graph as long tensors."""
src, dst = graph.edges()
return src.long(), dst.long()

[docs]def node_homophily(graph, y):
r"""Homophily measure from Geom-GCN: Geometric Graph Convolutional
Networks <https://arxiv.org/abs/2002.05287>__

We follow the practice of a later paper Large Scale Learning on
Non-Homophilous Graphs: New Benchmarks and Strong Simple Methods
<https://arxiv.org/abs/2110.14446>__ to call it node homophily.

Mathematically it is defined as follows:

.. math::
\frac{1}{|\mathcal{V}|} \sum_{v \in \mathcal{V}} \frac{ | \{u
\in \mathcal{N}(v): y_v = y_u \} |  } { |\mathcal{N}(v)| },

where :math:\mathcal{V} is the set of nodes, :math:\mathcal{N}(v) is
the predecessors of node :math:v, and :math:y_v is the class of node
:math:v.

Parameters
----------
graph : DGLGraph
The graph.
y : torch.Tensor
The node labels, which is a tensor of shape (|V|).

Returns
-------
float
The node homophily value.

Examples
--------
>>> import dgl
>>> import torch

>>> graph = dgl.graph(([1, 2, 0, 4], [0, 1, 2, 3]))
>>> y = torch.tensor([0, 0, 0, 0, 1])
>>> dgl.node_homophily(graph, y)
0.6000000238418579
"""
check_pytorch()
with graph.local_scope():
# Handle the case where graph is of dtype int32.
src, dst = get_long_edges(graph)
# Compute y_v = y_u for all edges.
graph.edata["same_class"] = (y[src] == y[dst]).float()
graph.update_all(
fn.copy_e("same_class", "m"), fn.mean("m", "same_class_deg")
)
return graph.ndata["same_class_deg"].mean(dim=0).item()

[docs]def edge_homophily(graph, y):
r"""Homophily measure from Beyond Homophily in Graph Neural Networks:
Current Limitations and Effective Designs
<https://arxiv.org/abs/2006.11468>__

Mathematically it is defined as follows:

.. math::
\frac{| \{ (u,v) : (u,v) \in \mathcal{E} \wedge y_u = y_v \} | }
{|\mathcal{E}|},

where :math:\mathcal{E} is the set of edges, and :math:y_u is the class
of node :math:u.

Parameters
----------
graph : DGLGraph
The graph.
y : torch.Tensor
The node labels, which is a tensor of shape (|V|).

Returns
-------
float
The edge homophily ratio value.

Examples
--------
>>> import dgl
>>> import torch

>>> graph = dgl.graph(([1, 2, 0, 4], [0, 1, 2, 3]))
>>> y = torch.tensor([0, 0, 0, 0, 1])
>>> dgl.edge_homophily(graph, y)
0.75
"""
check_pytorch()
with graph.local_scope():
# Handle the case where graph is of dtype int32.
src, dst = get_long_edges(graph)
# Compute y_v = y_u for all edges.
edge_indicator = (y[src] == y[dst]).float()
return edge_indicator.mean(dim=0).item()

r"""Homophily measure from Large Scale Learning on Non-Homophilous Graphs:
New Benchmarks and Strong Simple Methods
<https://arxiv.org/abs/2110.14446>__

Mathematically it is defined as follows:

.. math::
\frac{1}{C-1} \sum_{k=1}^{C} \max \left(0, \frac{\sum_{v\in C_k}|\{u\in
\mathcal{N}(v): y_v = y_u \}|}{\sum_{v\in C_k}|\mathcal{N}(v)|} -
\frac{|\mathcal{C}_k|}{|\mathcal{V}|} \right),

where :math:C is the number of node classes, :math:C_k is the set of
nodes that belong to class k, :math:\mathcal{N}(v) are the predecessors
of node :math:v, :math:y_v is the class of node :math:v, and
:math:\mathcal{V} is the set of nodes.

Parameters
----------
graph : DGLGraph
The graph.
y : torch.Tensor
The node labels, which is a tensor of shape (|V|).

Returns
-------
float
The homophily value.

Examples
--------
>>> import dgl
>>> import torch

>>> graph = dgl.graph(([0, 1, 2, 3], [1, 2, 0, 4]))
>>> y = torch.tensor([0, 0, 0, 0, 1])
0.19999998807907104
"""
check_pytorch()
with graph.local_scope():
# Compute |{u\in N(v): y_v = y_u}| for each node v.
# Handle the case where graph is of dtype int32.
src, dst = get_long_edges(graph)
# Compute y_v = y_u for all edges.
graph.edata["same_class"] = (y[src] == y[dst]).float()
graph.update_all(
fn.copy_e("same_class", "m"), fn.sum("m", "same_class_deg")
)

deg = graph.in_degrees().float()
num_nodes = graph.num_nodes()
num_classes = y.max(dim=0).values.item() + 1

value = torch.tensor(0.0).to(graph.device)
for k in range(num_classes):
# Get the nodes that belong to class k.