# Source code for dgl.sampling.node2vec_randomwalk

"""Node2vec random walk"""

from .._ffi.function import _init_api
from .. import backend as F
from .. import ndarray as nd
from .. import utils
# pylint: disable=invalid-name

__all__ = ['node2vec_random_walk']

[docs]def node2vec_random_walk(g, nodes, p, q, walk_length, prob=None, return_eids=False):
"""
Generate random walk traces from an array of starting nodes based on the node2vec model.
Paper: node2vec: Scalable Feature Learning for Networks
<https://arxiv.org/abs/1607.00653>__.

The returned traces all have length walk_length + 1, where the first node
is the starting node itself.

Note that if a random walk stops in advance, DGL pads the trace with -1 to have the same
length.

Parameters
----------
g : DGLGraph
The graph.  Must be on CPU.

Note that node2vec only support homogeneous graph.
nodes : Tensor
Node ID tensor from which the random walk traces starts.

The tensor must be on CPU, and must have the same dtype as the ID type
of the graph.
p: float
Likelihood of immediately revisiting a node in the walk.
q: float
Control parameter to interpolate between breadth-first strategy and depth-first strategy.
walk_length: int
Length of random walks.
prob : str, optional
The name of the edge feature tensor on the graph storing the (unnormalized)
probabilities associated with each edge for choosing the next node.

The feature tensor must be non-negative and the sum of the probabilities
must be positive for the outbound edges of all nodes (although they don't have
to sum up to one).  The result will be undefined otherwise.

If omitted, DGL assumes that the neighbors are picked uniformly.
return_eids : bool, optional
If True, additionally return the edge IDs traversed.

Default: False.

Returns
-------
traces : Tensor
A 2-dimensional node ID tensor with shape (num_seeds, walk_length + 1).
eids : Tensor, optional
A 2-dimensional edge ID tensor with shape (num_seeds, length).
Only returned if :attr:return_eids is True.

Examples
--------
>>> g1 = dgl.graph(([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]))
>>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, walk_length=4)
tensor([[0, 1, 3, 0, 1],
[1, 2, 0, 1, 3],
[2, 0, 1, 3, 0],
[0, 1, 2, 0, 1]])

>>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, walk_length=4, return_eids=True)
(tensor([[0, 1, 3, 0, 1],
[1, 2, 0, 1, 2],
[2, 0, 1, 2, 0],
[0, 1, 2, 0, 1]]),
tensor([[0, 2, 4, 0],
[1, 3, 0, 1],
[3, 0, 1, 3],
[0, 1, 3, 0]]))
"""
assert g.device == F.cpu(), "Graph must be on CPU."

gidx = g._graph
nodes = F.to_dgl_nd(utils.prepare_tensor(g, nodes, 'nodes'))

if prob is None:
prob_nd = nd.array([], ctx=nodes.ctx)
else:
prob_nd = F.to_dgl_nd(g.edata[prob])

traces, eids = _CAPI_DGLSamplingNode2vec(gidx, nodes, p, q, walk_length, prob_nd)

traces = F.from_dgl_nd(traces)
eids = F.from_dgl_nd(eids)

return (traces, eids) if return_eids else traces

_init_api('dgl.sampling.randomwalks', __name__)