Source code for dgl.nn.pytorch.conv.dgnconv

"""Torch Module for Directional Graph Networks Convolution Layer"""
# pylint: disable= no-member, arguments-differ, invalid-name
from functools import partial
import torch
import torch.nn as nn
from .pnaconv import AGGREGATORS, SCALERS, PNAConv, PNAConvTower

def aggregate_dir_av(h, eig_s, eig_d, eig_idx):
    """directional average aggregation"""
    h_mod = torch.mul(h, (
        torch.abs(eig_s[:, :, eig_idx] - eig_d[:, :, eig_idx]) /
            (torch.sum(torch.abs(eig_s[:, :, eig_idx] - eig_d[:, :, eig_idx]),
            keepdim=True, dim=1) + 1e-30)).unsqueeze(-1))
    return torch.sum(h_mod, dim=1)

def aggregate_dir_dx(h, eig_s, eig_d, h_in, eig_idx):
    """directional derivative aggregation"""
    eig_w = ((
        eig_s[:, :, eig_idx] - eig_d[:, :, eig_idx]) /
        (torch.sum(
            torch.abs(eig_s[:, :, eig_idx] - eig_d[:, :, eig_idx]),
            keepdim=True, dim=1) + 1e-30
        )
    ).unsqueeze(-1)
    h_mod = torch.mul(h, eig_w)
    return torch.abs(torch.sum(h_mod, dim=1) - torch.sum(eig_w, dim=1) * h_in)

for k in range(1, 4):
    AGGREGATORS[f'dir{k}-av'] = partial(aggregate_dir_av, eig_idx=k-1)
    AGGREGATORS[f'dir{k}-dx'] = partial(aggregate_dir_dx, eig_idx=k-1)

class DGNConvTower(PNAConvTower):
    """A single DGN tower with modified reduce function"""
    def message(self, edges):
        """message function for DGN layer"""
        if self.edge_feat_size > 0:
            f = torch.cat([edges.src['h'], edges.dst['h'], edges.data['a']], dim=-1)
        else:
            f = torch.cat([edges.src['h'], edges.dst['h']], dim=-1)
        return {'msg': self.M(f), 'eig_s': edges.src['eig'], 'eig_d': edges.dst['eig']}

    def reduce_func(self, nodes):
        """reduce function for DGN layer"""
        h_in = nodes.data['h']
        eig_s = nodes.mailbox['eig_s']
        eig_d = nodes.mailbox['eig_d']
        msg = nodes.mailbox['msg']
        degree = msg.size(1)

        h = []
        for agg in self.aggregators:
            if agg.startswith('dir'):
                if agg.endswith('av'):
                    h.append(AGGREGATORS[agg](msg, eig_s, eig_d))
                else:
                    h.append(AGGREGATORS[agg](msg, eig_s, eig_d, h_in))
            else:
                h.append(AGGREGATORS[agg](msg))
        h = torch.cat(h, dim=1)
        h = torch.cat([
            SCALERS[scaler](h, D=degree, delta=self.delta) if scaler != 'identity' else h
            for scaler in self.scalers
        ], dim=1)
        return {'h_neigh': h}

[docs]class DGNConv(PNAConv):
    r"""Directional Graph Network Layer from `Directional Graph Networks
    <https://arxiv.org/abs/2010.02863>`__

    DGN introduces two special directional aggregators according to the vector field
    :math:`F`, which is defined as the gradient of the low-frequency eigenvectors of graph
    laplacian.

    The directional average aggregator is defined as
    :math:`h_i' = \sum_{j\in\mathcal{N}(i)}\frac{|F_{i,j}|\cdot h_j}{||F_{i,:}||_1+\epsilon}`

    The directional derivative aggregator is defined as
    :math:`h_i' = \sum_{j\in\mathcal{N}(i)}\frac{F_{i,j}\cdot h_j}{||F_{i,:}||_1+\epsilon}
    -h_i\cdot\sum_{j\in\mathcal{N}(i)}\frac{F_{i,j}}{||F_{i,:}||_1+\epsilon}`

    :math:`\epsilon` is the infinitesimal to keep the computation numerically stable.

    Parameters
    ----------
    in_size : int
        Input feature size; i.e. the size of :math:`h_i^l`.
    out_size : int
        Output feature size; i.e. the size of :math:`h_i^{l+1}`.
    aggregators : list of str
        List of aggregation function names(each aggregator specifies a way to aggregate
        messages from neighbours), selected from:

        * ``mean``: the mean of neighbour messages

        * ``max``: the maximum of neighbour messages

        * ``min``: the minimum of neighbour messages

        * ``std``: the standard deviation of neighbour messages

        * ``var``: the variance of neighbour messages

        * ``sum``: the sum of neighbour messages

        * ``moment3``, ``moment4``, ``moment5``: the normalized moments aggregation
        :math:`(E[(X-E[X])^n])^{1/n}`

        * ``dir{k}-av``: directional average aggregation with directions defined by the k-th
        smallest eigenvectors. k can be selected from 1, 2, 3.

        * ``dir{k}-dx``: directional derivative aggregation with directions defined by the k-th
        smallest eigenvectors. k can be selected from 1, 2, 3.

        Note that using directional aggregation requires the LaplacianPE transform on the input
        graph for eigenvector computation (the PE size must be >= k above).
    scalers: list of str
        List of scaler function names, selected from:

        * ``identity``: no scaling

        * ``amplification``: multiply the aggregated message by :math:`\log(d+1)/\delta`,
        where :math:`d` is the in-degree of the node.

        * ``attenuation``: multiply the aggregated message by :math:`\delta/\log(d+1)`
    delta: float
        The in-degree-related normalization factor computed over the training set, used by scalers
        for normalization. :math:`E[\log(d+1)]`, where :math:`d` is the in-degree for each node
        in the training set.
    dropout: float, optional
        The dropout ratio. Default: 0.0.
    num_towers: int, optional
        The number of towers used. Default: 1. Note that in_size and out_size must be divisible
        by num_towers.
    edge_feat_size: int, optional
        The edge feature size. Default: 0.
    residual : bool, optional
        The bool flag that determines whether to add a residual connection for the
        output. Default: True. If in_size and out_size of the DGN conv layer are not
        the same, this flag will be set as False forcibly.

    Example
    -------
    >>> import dgl
    >>> import torch as th
    >>> from dgl.nn import DGNConv
    >>> from dgl import LaplacianPE
    >>>
    >>> # DGN requires precomputed eigenvectors, with 'eig' as feature name.
    >>> transform = LaplacianPE(k=3, feat_name='eig')
    >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3]))
    >>> g = transform(g)
    >>> eig = g.ndata['eig']
    >>> feat = th.ones(6, 10)
    >>> conv = DGNConv(10, 10, ['dir1-av', 'dir1-dx', 'sum'], ['identity', 'amplification'], 2.5)
    >>> ret = conv(g, feat, eig_vec=eig)
    """
    def __init__(self, in_size, out_size, aggregators, scalers, delta,
        dropout=0., num_towers=1, edge_feat_size=0, residual=True):
        super(DGNConv, self).__init__(
            in_size, out_size, aggregators, scalers, delta, dropout,
            num_towers, edge_feat_size, residual
        )

        self.towers = nn.ModuleList([
            DGNConvTower(
                self.tower_in_size, self.tower_out_size,
                aggregators, scalers, delta,
                dropout=dropout, edge_feat_size=edge_feat_size
            ) for _ in range(num_towers)
        ])

        self.use_eig_vec = False
        for aggr in aggregators:
            if aggr.startswith('dir'):
                self.use_eig_vec = True
                break

[docs]    def forward(self, graph, node_feat, edge_feat=None, eig_vec=None):
        r"""
        Description
        -----------
        Compute DGN layer.

        Parameters
        ----------
        graph : DGLGraph
            The graph.
        node_feat : torch.Tensor
            The input feature of shape :math:`(N, h_n)`. :math:`N` is the number of
            nodes, and :math:`h_n` must be the same as in_size.
        edge_feat : torch.Tensor, optional
            The edge feature of shape :math:`(M, h_e)`. :math:`M` is the number of
            edges, and :math:`h_e` must be the same as edge_feat_size.
        eig_vec : torch.Tensor, optional
            K smallest non-trivial eigenvectors of Graph Laplacian of shape :math:`(N, K)`.
            It is only required when :attr:`aggregators` contains directional aggregators.

        Returns
        -------
        torch.Tensor
            The output node feature of shape :math:`(N, h_n')` where :math:`h_n'`
            should be the same as out_size.
        """
        with graph.local_scope():
            if self.use_eig_vec:
                graph.ndata['eig'] = eig_vec
            return super().forward(graph, node_feat, edge_feat)