Source code for dgl.nn.pytorch.gt.path_encoder

"""Path Encoder"""
import torch as th
import torch.nn as nn


[docs]class PathEncoder(nn.Module): r"""Path Encoder, as introduced in Edge Encoding of `Do Transformers Really Perform Bad for Graph Representation? <https://proceedings.neurips.cc/paper/2021/file/f1c1592588411002af340cbaedd6fc33-Paper.pdf>`__ This module is a learnable path embedding module and encodes the shortest path between each pair of nodes as attention bias. Parameters ---------- max_len : int Maximum number of edges in each path to be encoded. Exceeding part of each path will be truncated, i.e. truncating edges with serial number no less than :attr:`max_len`. feat_dim : int Dimension of edge features in the input graph. num_heads : int, optional Number of attention heads if multi-head attention mechanism is applied. Default : 1. Examples -------- >>> import torch as th >>> import dgl >>> from dgl.nn import PathEncoder >>> from dgl import shortest_dist >>> g = dgl.graph(([0,0,0,1,1,2,3,3], [1,2,3,0,3,0,0,1])) >>> edata = th.rand(8, 16) >>> # Since shortest_dist returns -1 for unreachable node pairs, >>> # edata[-1] should be filled with zero padding. >>> edata = th.cat( (edata, th.zeros(1, 16)), dim=0 ) >>> dist, path = shortest_dist(g, root=None, return_paths=True) >>> path_data = edata[path[:, :, :2]] >>> path_encoder = PathEncoder(2, 16, num_heads=8) >>> out = path_encoder(dist.unsqueeze(0), path_data.unsqueeze(0)) >>> print(out.shape) torch.Size([1, 4, 4, 8]) """ def __init__(self, max_len, feat_dim, num_heads=1): super().__init__() self.max_len = max_len self.feat_dim = feat_dim self.num_heads = num_heads self.embedding_table = nn.Embedding(max_len * num_heads, feat_dim)
[docs] def forward(self, dist, path_data): """ Parameters ---------- dist : Tensor Shortest path distance matrix of the batched graph with zero padding, of shape :math:`(B, N, N)`, where :math:`B` is the batch size of the batched graph, and :math:`N` is the maximum number of nodes. path_data : Tensor Edge feature along the shortest path with zero padding, of shape :math:`(B, N, N, L, d)`, where :math:`L` is the maximum length of the shortest paths, and :math:`d` is :attr:`feat_dim`. Returns ------- torch.Tensor Return attention bias as path encoding, of shape :math:`(B, N, N, H)`, where :math:`B` is the batch size of the input graph, :math:`N` is the maximum number of nodes, and :math:`H` is :attr:`num_heads`. """ shortest_distance = th.clamp(dist, min=1, max=self.max_len) edge_embedding = self.embedding_table.weight.reshape( self.max_len, self.num_heads, -1 ) path_encoding = th.div( th.einsum("bxyld,lhd->bxyh", path_data, edge_embedding).permute( 3, 0, 1, 2 ), shortest_distance, ).permute(1, 2, 3, 0) return path_encoding