neo4j
diff --git a/‎graphdatascience/procedure_surface/api/catalog/graph_sampling_endpoints.py‎
Lines changed: 187 additions & 0 deletions b/‎graphdatascience/procedure_surface/api/catalog/graph_sampling_endpoints.py‎
Lines changed: 187 additions & 0 deletions
diff --git a/‎graphdatascience/procedure_surface/api/catalog_endpoints.py‎
Lines changed: 1 addition & 1 deletion b/‎graphdatascience/procedure_surface/api/catalog_endpoints.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎graphdatascience/procedure_surface/api/centrality/__init__.py‎ b/‎graphdatascience/procedure_surface/api/centrality/__init__.py‎
diff --git a/‎graphdatascience/procedure_surface/api/articlerank_endpoints.py‎ renamed to ‎graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py‎
Lines changed: 53 additions & 46 deletions b/‎graphdatascience/procedure_surface/api/articlerank_endpoints.py‎ renamed to ‎graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py‎
Lines changed: 53 additions & 46 deletions
@@ -0,0 +1,187 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from types import TracebackType
+from typing import List, NamedTuple, Optional, Type
+
+from graphdatascience.procedure_surface.api.base_result import BaseResult
+from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
+
+
+class GraphSamplingEndpoints(ABC):
+    """
+    Abstract base class defining the API for graph sampling operations.
+    """
+
+    @abstractmethod
+    def rwr(
+        self,
+        G: GraphV2,
+        graph_name: str,
+        start_nodes: Optional[List[int]] = None,
+        restart_probability: Optional[float] = None,
+        sampling_ratio: Optional[float] = None,
+        node_label_stratification: Optional[bool] = None,
+        relationship_weight_property: Optional[str] = None,
+        relationship_types: Optional[List[str]] = None,
+        node_labels: Optional[List[str]] = None,
+        sudo: Optional[bool] = None,
+        log_progress: Optional[bool] = None,
+        username: Optional[str] = None,
+        concurrency: Optional[int] = None,
+        job_id: Optional[str] = None,
+    ) -> GraphWithSamplingResult:
+        """
+        Random walk with restarts (RWR) samples the graph by taking random walks from a set of start nodes.
+
+        On each step of a random walk, there is a probability that the walk stops, and a new walk from one of the start
+        nodes starts instead (i.e. the walk restarts). Each node visited on these walks will be part of the sampled
+        subgraph. The resulting subgraph is stored as a new graph in the Graph Catalog.
+
+        Parameters
+        ----------
+        G : GraphV2
+            The input graph to be sampled.
+        graph_name : str
+            The name of the new graph that is stored in the graph catalog.
+        start_nodes : list of int, optional
+            IDs of the initial set of nodes in the original graph from which the sampling random walks will start.
+            By default, a single node is chosen uniformly at random.
+        restart_probability : float, optional
+            The probability that a sampling random walk restarts from one of the start nodes.
+            Default is 0.1.
+        sampling_ratio : float, optional
+            The fraction of nodes in the original graph to be sampled.
+            Default is 0.15.
+        node_label_stratification : bool, optional
+            If true, preserves the node label distribution of the original graph.
+            Default is False.
+        relationship_weight_property : str, optional
+            Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted.
+        relationship_types : list of str, optional
+            Filter the named graph using the given relationship types. Relationships with any of the given types will be
+            included.
+        node_labels : list of str, optional
+            Filter the named graph using the given node labels. Nodes with any of the given labels will be included.
+        sudo : bool, optional
+            Bypass heap control. Use with caution.
+            Default is False.
+        log_progress : bool, optional
+            Turn `on/off` percentage logging while running procedure.
+            Default is True.
+        username : str, optional
+            Use Administrator access to run an algorithm on a graph owned by another user.
+            Default is None.
+        concurrency : int, optional
+            The number of concurrent threads used for running the algorithm.
+            Default is 4.
+        job_id : str, optional
+            An ID that can be provided to more easily track the algorithm’s progress.
+            By default, a random job id is generated.
+
+        Returns
+        -------
+        GraphWithSamplingResult
+            Tuple of the graph object and the result of the Random Walk with Restart (RWR), including the dimensions of the sampled graph.
+        """
+        pass
+
+    @abstractmethod
+    def cnarw(
+        self,
+        G: GraphV2,
+        graph_name: str,
+        start_nodes: Optional[List[int]] = None,
+        restart_probability: Optional[float] = None,
+        sampling_ratio: Optional[float] = None,
+        node_label_stratification: Optional[bool] = None,
+        relationship_weight_property: Optional[str] = None,
+        relationship_types: Optional[List[str]] = None,
+        node_labels: Optional[List[str]] = None,
+        sudo: Optional[bool] = None,
+        log_progress: Optional[bool] = None,
+        username: Optional[str] = None,
+        concurrency: Optional[int] = None,
+        job_id: Optional[str] = None,
+    ) -> GraphWithSamplingResult:
+        """
+        Common Neighbour Aware Random Walk (CNARW) samples the graph by taking random walks from a set of start nodes
+
+        CNARW is a graph sampling technique that involves optimizing the selection of the next-hop node. It takes into
+        account the number of common neighbours between the current node and the next-hop candidates. On each step of a
+        random walk, there is a probability that the walk stops, and a new walk from one of the start nodes starts
+        instead (i.e. the walk restarts). Each node visited on these walks will be part of the sampled subgraph. The
+        resulting subgraph is stored as a new graph in the Graph Catalog.
+
+        Parameters
+        ----------
+        G : GraphV2
+            The input graph to be sampled.
+        graph_name : str
+            The name of the new graph that is stored in the graph catalog.
+        start_nodes : list of int, optional
+            IDs of the initial set of nodes in the original graph from which the sampling random walks will start.
+                By default, a single node is chosen uniformly at random.
+        restart_probability : float, optional
+            The probability that a sampling random walk restarts from one of the start nodes.
+            Default is 0.1.
+        sampling_ratio : float, optional
+            The fraction of nodes in the original graph to be sampled.
+            Default is 0.15.
+        node_label_stratification : bool, optional
+            If true, preserves the node label distribution of the original graph.
+            Default is False.
+        relationship_weight_property : str, optional
+            Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted.
+        relationship_types : list of str, optional
+            Filter the named graph using the given relationship types. Relationships with any of the given types will be
+            included.
+        node_labels : list of str, optional
+            Filter the named graph using the given node labels. Nodes with any of the given labels will be included.
+        sudo : bool, optional
+            Bypass heap control. Use with caution.
+            Default is False.
+        log_progress : bool, optional
+            Turn `on/off` percentage logging while running procedure.
+            Default is True.
+        username : str, optional
+            Use Administrator access to run an algorithm on a graph owned by another user.
+            Default is None.
+        concurrency : int, optional
+            The number of concurrent threads used for running the algorithm.
+            Default is 4.
+        job_id : str, optional
+            An ID that can be provided to more easily track the algorithm’s progress.
+            By default, a random job id is generated.
+
+        Returns
+        -------
+        GraphSamplingResult
+            Tuple of the graph object and the result of the Common Neighbour Aware Random Walk (CNARW), including the dimensions of the sampled graph.
+        """
+        pass
+
+
+class GraphSamplingResult(BaseResult):
+    graph_name: str
+    from_graph_name: str
+    node_count: int
+    relationship_count: int
+    start_node_count: int
+    project_millis: int
+
+
+class GraphWithSamplingResult(NamedTuple):
+    graph: GraphV2
+    result: GraphSamplingResult
+
+    def __enter__(self) -> GraphV2:
+        return self.graph
+
+    def __exit__(
+        self,
+        exception_type: Optional[Type[BaseException]],
+        exception_value: Optional[BaseException],
+        traceback: Optional[TracebackType],
+    ) -> None:
+        self.graph.drop()
@@ -7,10 +7,10 @@
 from graphdatascience.procedure_surface.api.base_result import BaseResult
 from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
 from graphdatascience.procedure_surface.api.catalog.graph_info import GraphInfo, GraphInfoWithDegrees
+from graphdatascience.procedure_surface.api.catalog.graph_sampling_endpoints import GraphSamplingEndpoints
 from graphdatascience.procedure_surface.api.catalog.node_label_endpoints import NodeLabelEndpoints
 from graphdatascience.procedure_surface.api.catalog.node_properties_endpoints import NodePropertiesEndpoints
 from graphdatascience.procedure_surface.api.catalog.relationships_endpoints import RelationshipsEndpoints
-from graphdatascience.procedure_surface.api.graph_sampling_endpoints import GraphSamplingEndpoints
 
 
 class CatalogEndpoints(ABC):
 
@@ -7,15 +7,10 @@
 
 from graphdatascience.procedure_surface.api.base_result import BaseResult
 from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
-
-from .estimation_result import EstimationResult
+from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
 
 
 class ArticleRankEndpoints(ABC):
-    """
-    Abstract base class defining the API for the ArticleRank algorithm.
-    """
-
     @abstractmethod
     def mutate(
         self,
@@ -36,40 +31,44 @@ def mutate(
         source_nodes: Optional[Any] = None,
     ) -> ArticleRankMutateResult:
         """
-        Executes the ArticleRank algorithm and writes the results back to the graph as a node property.
+        Runs the Article Rank algorithm and stores the results in the graph catalog as a new node property.
+
+        ArticleRank is a variant of the Page Rank algorithm, which measures the transitive influence of nodes.
+        Page Rank follows the assumption that relationships originating from low-degree nodes have a higher influence than relationships from high-degree nodes.
+        Article Rank lowers the influence of low-degree nodes by lowering the scores being sent to their neighbors in each iteration.
 
         Parameters
         ----------
         G : GraphV2
             The graph to run the algorithm on
         mutate_property : str
-            The property name to store the ArticleRank score for each node
+            Name of the node property to store the results in.
         damping_factor : Optional[float], default=None
-            The damping factor controls the probability of a random jump to a random node
+            Probability of a jump to a random node.
         tolerance : Optional[float], default=None
-            Minimum change in scores between iterations
+            Minimum change in scores between iterations.
         max_iterations : Optional[int], default=None
-            The maximum number of iterations to run
+            Maximum number of iterations to run.
         scaler : Optional[Any], default=None
-            Configuration for scaling the scores
+            Name of the scaler applied on the resulting scores.
         relationship_types : Optional[List[str]], default=None
-            The relationships types used to select relationships for this algorithm run
+            Filter the graph using the given relationship types. Relationships with any of the given types will be included.
         node_labels : Optional[List[str]], default=None
-            The node labels used to select nodes for this algorithm run
+            Filter the graph using the given node labels. Nodes with any of the given labels will be included.
         sudo : Optional[bool], default=None
-            Override memory estimation limits
+            Disable the memory guard.
         log_progress : Optional[bool], default=None
-            Whether to log progress
+            Display progress logging.
         username : Optional[str], default=None
             The username to attribute the procedure run to
         concurrency : Optional[Any], default=None
-            The number of concurrent threads
+            Number of threads to use for running the algorithm.
         job_id : Optional[Any], default=None
-            An identifier for the job
+            Identifier for the job.
         relationship_weight_property : Optional[str], default=None
-            The property name that contains weight
+            Name of the property to be used as weights.
         source_nodes : Optional[Any], default=None
-            The source nodes for personalized ArticleRank
+            List of node ids to use as starting points. Use a list of list pairs to associate each node with a bias > 0.
 
         Returns
         -------
@@ -96,38 +95,42 @@ def stats(
         source_nodes: Optional[Any] = None,
     ) -> ArticleRankStatsResult:
         """
-        Executes the ArticleRank algorithm and returns result statistics without writing the result to Neo4j.
+        Runs the Article Rank algorithm and returns result statistics without storing the results.
+
+        ArticleRank is a variant of the Page Rank algorithm, which measures the transitive influence of nodes.
+        Page Rank follows the assumption that relationships originating from low-degree nodes have a higher influence than relationships from high-degree nodes.
+        Article Rank lowers the influence of low-degree nodes by lowering the scores being sent to their neighbors in each iteration.
 
         Parameters
         ----------
         G : GraphV2
             The graph to run the algorithm on
         damping_factor : Optional[float], default=None
-            The damping factor controls the probability of a random jump to a random node
+            Probability of a jump to a random node.
         tolerance : Optional[float], default=None
-            Minimum change in scores between iterations
+            Minimum change in scores between iterations.
         max_iterations : Optional[int], default=None
-            The maximum number of iterations to run
+            Maximum number of iterations to run.
         scaler : Optional[Any], default=None
-            Configuration for scaling the scores
+            Name of the scaler applied on the resulting scores.
         relationship_types : Optional[List[str]], default=None
-            The relationships types used to select relationships for this algorithm run
+            Filter the graph using the given relationship types. Relationships with any of the given types will be included.
         node_labels : Optional[List[str]], default=None
-            The node labels used to select nodes for this algorithm run
+            Filter the graph using the given node labels. Nodes with any of the given labels will be included.
         sudo : Optional[bool], default=None
-            Override memory estimation limits
+            Disable the memory guard.
         log_progress : Optional[bool], default=None
-            Whether to log progress
+            Display progress logging.
         username : Optional[str], default=None
             The username to attribute the procedure run to
         concurrency : Optional[Any], default=None
-            The number of concurrent threads
+            Number of threads to use for running the algorithm.
         job_id : Optional[Any], default=None
-            An identifier for the job
+            Identifier for the job.
         relationship_weight_property : Optional[str], default=None
-            The property name that contains weight
+            Name of the property to be used as weights.
         source_nodes : Optional[Any], default=None
-            The source nodes for personalized ArticleRank
+            List of node ids to use as starting points. Use a list of list pairs to associate each node with a bias > 0.
 
         Returns
         -------
@@ -214,7 +217,11 @@ def write(
         write_concurrency: Optional[int] = None,
     ) -> ArticleRankWriteResult:
         """
-        Executes the ArticleRank algorithm and writes the results to Neo4j.
+        Runs the Article Rank algorithm and stores the result in the Neo4j database as a new node property.
+
+        ArticleRank is a variant of the Page Rank algorithm, which measures the transitive influence of nodes.
+        Page Rank follows the assumption that relationships originating from low-degree nodes have a higher influence than relationships from high-degree nodes.
+        Article Rank lowers the influence of low-degree nodes by lowering the scores being sent to their neighbors in each iteration.
 
         Parameters
         ----------
@@ -223,31 +230,31 @@ def write(
         write_property : str
             The property name to write the ArticleRank score for each node
         damping_factor : Optional[float], default=None
-            The damping factor controls the probability of a random jump to a random node
+            Probability of a jump to a random node.
         tolerance : Optional[float], default=None
-            Minimum change in scores between iterations
+            Minimum change in scores between iterations.
         max_iterations : Optional[int], default=None
-            The maximum number of iterations to run
+            Maximum number of iterations to run.
         scaler : Optional[Any], default=None
-            Configuration for scaling the scores
+            Name of the scaler applied on the resulting scores.
         relationship_types : Optional[List[str]], default=None
-            The relationships types used to select relationships for this algorithm run
+            Filter the graph using the given relationship types. Relationships with any of the given types will be included.
         node_labels : Optional[List[str]], default=None
-            The node labels used to select nodes for this algorithm run
+            Filter the graph using the given node labels. Nodes with any of the given labels will be included.
         sudo : Optional[bool], default=None
-            Override memory estimation limits
+            Disable the memory guard.
         log_progress : Optional[bool], default=None
-            Whether to log progress
+            Display progress logging.
         username : Optional[str], default=None
             The username to attribute the procedure run to
         concurrency : Optional[Any], default=None
-            The number of concurrent threads
+            Number of threads to use for running the algorithm.
         job_id : Optional[Any], default=None
-            An identifier for the job
+            Identifier for the job.
         relationship_weight_property : Optional[str], default=None
-            The property name that contains weight
+            Name of the property to be used as weights.
         source_nodes : Optional[Any], default=None
-            The source nodes for personalized ArticleRank
+            List of node ids to use as starting points. Use a list of list pairs to associate each node with a bias > 0.
         write_concurrency : Optional[int], default=None
             The number of concurrent threads used for writing