Skip to content

Commit 6202d68

Browse files
authored
Merge pull request #944 from DarthMax/AV2_doc_improvements
Improve docstrings
2 parents b381416 + e1f518b commit 6202d68

File tree

99 files changed

+754
-641
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+754
-641
lines changed
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
from __future__ import annotations
2+
3+
from abc import ABC, abstractmethod
4+
from types import TracebackType
5+
from typing import List, NamedTuple, Optional, Type
6+
7+
from graphdatascience.procedure_surface.api.base_result import BaseResult
8+
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
9+
10+
11+
class GraphSamplingEndpoints(ABC):
12+
"""
13+
Abstract base class defining the API for graph sampling operations.
14+
"""
15+
16+
@abstractmethod
17+
def rwr(
18+
self,
19+
G: GraphV2,
20+
graph_name: str,
21+
start_nodes: Optional[List[int]] = None,
22+
restart_probability: Optional[float] = None,
23+
sampling_ratio: Optional[float] = None,
24+
node_label_stratification: Optional[bool] = None,
25+
relationship_weight_property: Optional[str] = None,
26+
relationship_types: Optional[List[str]] = None,
27+
node_labels: Optional[List[str]] = None,
28+
sudo: Optional[bool] = None,
29+
log_progress: Optional[bool] = None,
30+
username: Optional[str] = None,
31+
concurrency: Optional[int] = None,
32+
job_id: Optional[str] = None,
33+
) -> GraphWithSamplingResult:
34+
"""
35+
Random walk with restarts (RWR) samples the graph by taking random walks from a set of start nodes.
36+
37+
On each step of a random walk, there is a probability that the walk stops, and a new walk from one of the start
38+
nodes starts instead (i.e. the walk restarts). Each node visited on these walks will be part of the sampled
39+
subgraph. The resulting subgraph is stored as a new graph in the Graph Catalog.
40+
41+
Parameters
42+
----------
43+
G : GraphV2
44+
The input graph to be sampled.
45+
graph_name : str
46+
The name of the new graph that is stored in the graph catalog.
47+
start_nodes : list of int, optional
48+
IDs of the initial set of nodes in the original graph from which the sampling random walks will start.
49+
By default, a single node is chosen uniformly at random.
50+
restart_probability : float, optional
51+
The probability that a sampling random walk restarts from one of the start nodes.
52+
Default is 0.1.
53+
sampling_ratio : float, optional
54+
The fraction of nodes in the original graph to be sampled.
55+
Default is 0.15.
56+
node_label_stratification : bool, optional
57+
If true, preserves the node label distribution of the original graph.
58+
Default is False.
59+
relationship_weight_property : str, optional
60+
Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted.
61+
relationship_types : list of str, optional
62+
Filter the named graph using the given relationship types. Relationships with any of the given types will be
63+
included.
64+
node_labels : list of str, optional
65+
Filter the named graph using the given node labels. Nodes with any of the given labels will be included.
66+
sudo : bool, optional
67+
Bypass heap control. Use with caution.
68+
Default is False.
69+
log_progress : bool, optional
70+
Turn `on/off` percentage logging while running procedure.
71+
Default is True.
72+
username : str, optional
73+
Use Administrator access to run an algorithm on a graph owned by another user.
74+
Default is None.
75+
concurrency : int, optional
76+
The number of concurrent threads used for running the algorithm.
77+
Default is 4.
78+
job_id : str, optional
79+
An ID that can be provided to more easily track the algorithm’s progress.
80+
By default, a random job id is generated.
81+
82+
Returns
83+
-------
84+
GraphWithSamplingResult
85+
Tuple of the graph object and the result of the Random Walk with Restart (RWR), including the dimensions of the sampled graph.
86+
"""
87+
pass
88+
89+
@abstractmethod
90+
def cnarw(
91+
self,
92+
G: GraphV2,
93+
graph_name: str,
94+
start_nodes: Optional[List[int]] = None,
95+
restart_probability: Optional[float] = None,
96+
sampling_ratio: Optional[float] = None,
97+
node_label_stratification: Optional[bool] = None,
98+
relationship_weight_property: Optional[str] = None,
99+
relationship_types: Optional[List[str]] = None,
100+
node_labels: Optional[List[str]] = None,
101+
sudo: Optional[bool] = None,
102+
log_progress: Optional[bool] = None,
103+
username: Optional[str] = None,
104+
concurrency: Optional[int] = None,
105+
job_id: Optional[str] = None,
106+
) -> GraphWithSamplingResult:
107+
"""
108+
Common Neighbour Aware Random Walk (CNARW) samples the graph by taking random walks from a set of start nodes
109+
110+
CNARW is a graph sampling technique that involves optimizing the selection of the next-hop node. It takes into
111+
account the number of common neighbours between the current node and the next-hop candidates. On each step of a
112+
random walk, there is a probability that the walk stops, and a new walk from one of the start nodes starts
113+
instead (i.e. the walk restarts). Each node visited on these walks will be part of the sampled subgraph. The
114+
resulting subgraph is stored as a new graph in the Graph Catalog.
115+
116+
Parameters
117+
----------
118+
G : GraphV2
119+
The input graph to be sampled.
120+
graph_name : str
121+
The name of the new graph that is stored in the graph catalog.
122+
start_nodes : list of int, optional
123+
IDs of the initial set of nodes in the original graph from which the sampling random walks will start.
124+
By default, a single node is chosen uniformly at random.
125+
restart_probability : float, optional
126+
The probability that a sampling random walk restarts from one of the start nodes.
127+
Default is 0.1.
128+
sampling_ratio : float, optional
129+
The fraction of nodes in the original graph to be sampled.
130+
Default is 0.15.
131+
node_label_stratification : bool, optional
132+
If true, preserves the node label distribution of the original graph.
133+
Default is False.
134+
relationship_weight_property : str, optional
135+
Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted.
136+
relationship_types : list of str, optional
137+
Filter the named graph using the given relationship types. Relationships with any of the given types will be
138+
included.
139+
node_labels : list of str, optional
140+
Filter the named graph using the given node labels. Nodes with any of the given labels will be included.
141+
sudo : bool, optional
142+
Bypass heap control. Use with caution.
143+
Default is False.
144+
log_progress : bool, optional
145+
Turn `on/off` percentage logging while running procedure.
146+
Default is True.
147+
username : str, optional
148+
Use Administrator access to run an algorithm on a graph owned by another user.
149+
Default is None.
150+
concurrency : int, optional
151+
The number of concurrent threads used for running the algorithm.
152+
Default is 4.
153+
job_id : str, optional
154+
An ID that can be provided to more easily track the algorithm’s progress.
155+
By default, a random job id is generated.
156+
157+
Returns
158+
-------
159+
GraphSamplingResult
160+
Tuple of the graph object and the result of the Common Neighbour Aware Random Walk (CNARW), including the dimensions of the sampled graph.
161+
"""
162+
pass
163+
164+
165+
class GraphSamplingResult(BaseResult):
166+
graph_name: str
167+
from_graph_name: str
168+
node_count: int
169+
relationship_count: int
170+
start_node_count: int
171+
project_millis: int
172+
173+
174+
class GraphWithSamplingResult(NamedTuple):
175+
graph: GraphV2
176+
result: GraphSamplingResult
177+
178+
def __enter__(self) -> GraphV2:
179+
return self.graph
180+
181+
def __exit__(
182+
self,
183+
exception_type: Optional[Type[BaseException]],
184+
exception_value: Optional[BaseException],
185+
traceback: Optional[TracebackType],
186+
) -> None:
187+
self.graph.drop()

graphdatascience/procedure_surface/api/catalog_endpoints.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
from graphdatascience.procedure_surface.api.base_result import BaseResult
88
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
99
from graphdatascience.procedure_surface.api.catalog.graph_info import GraphInfo, GraphInfoWithDegrees
10+
from graphdatascience.procedure_surface.api.catalog.graph_sampling_endpoints import GraphSamplingEndpoints
1011
from graphdatascience.procedure_surface.api.catalog.node_label_endpoints import NodeLabelEndpoints
1112
from graphdatascience.procedure_surface.api.catalog.node_properties_endpoints import NodePropertiesEndpoints
1213
from graphdatascience.procedure_surface.api.catalog.relationships_endpoints import RelationshipsEndpoints
13-
from graphdatascience.procedure_surface.api.graph_sampling_endpoints import GraphSamplingEndpoints
1414

1515

1616
class CatalogEndpoints(ABC):

graphdatascience/procedure_surface/api/centrality/__init__.py

Whitespace-only changes.

graphdatascience/procedure_surface/api/articlerank_endpoints.py renamed to graphdatascience/procedure_surface/api/centrality/articlerank_endpoints.py

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,10 @@
77

88
from graphdatascience.procedure_surface.api.base_result import BaseResult
99
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
10-
11-
from .estimation_result import EstimationResult
10+
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
1211

1312

1413
class ArticleRankEndpoints(ABC):
15-
"""
16-
Abstract base class defining the API for the ArticleRank algorithm.
17-
"""
18-
1914
@abstractmethod
2015
def mutate(
2116
self,
@@ -36,40 +31,44 @@ def mutate(
3631
source_nodes: Optional[Any] = None,
3732
) -> ArticleRankMutateResult:
3833
"""
39-
Executes the ArticleRank algorithm and writes the results back to the graph as a node property.
34+
Runs the Article Rank algorithm and stores the results in the graph catalog as a new node property.
35+
36+
ArticleRank is a variant of the Page Rank algorithm, which measures the transitive influence of nodes.
37+
Page Rank follows the assumption that relationships originating from low-degree nodes have a higher influence than relationships from high-degree nodes.
38+
Article Rank lowers the influence of low-degree nodes by lowering the scores being sent to their neighbors in each iteration.
4039
4140
Parameters
4241
----------
4342
G : GraphV2
4443
The graph to run the algorithm on
4544
mutate_property : str
46-
The property name to store the ArticleRank score for each node
45+
Name of the node property to store the results in.
4746
damping_factor : Optional[float], default=None
48-
The damping factor controls the probability of a random jump to a random node
47+
Probability of a jump to a random node.
4948
tolerance : Optional[float], default=None
50-
Minimum change in scores between iterations
49+
Minimum change in scores between iterations.
5150
max_iterations : Optional[int], default=None
52-
The maximum number of iterations to run
51+
Maximum number of iterations to run.
5352
scaler : Optional[Any], default=None
54-
Configuration for scaling the scores
53+
Name of the scaler applied on the resulting scores.
5554
relationship_types : Optional[List[str]], default=None
56-
The relationships types used to select relationships for this algorithm run
55+
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
5756
node_labels : Optional[List[str]], default=None
58-
The node labels used to select nodes for this algorithm run
57+
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
5958
sudo : Optional[bool], default=None
60-
Override memory estimation limits
59+
Disable the memory guard.
6160
log_progress : Optional[bool], default=None
62-
Whether to log progress
61+
Display progress logging.
6362
username : Optional[str], default=None
6463
The username to attribute the procedure run to
6564
concurrency : Optional[Any], default=None
66-
The number of concurrent threads
65+
Number of threads to use for running the algorithm.
6766
job_id : Optional[Any], default=None
68-
An identifier for the job
67+
Identifier for the job.
6968
relationship_weight_property : Optional[str], default=None
70-
The property name that contains weight
69+
Name of the property to be used as weights.
7170
source_nodes : Optional[Any], default=None
72-
The source nodes for personalized ArticleRank
71+
List of node ids to use as starting points. Use a list of list pairs to associate each node with a bias > 0.
7372
7473
Returns
7574
-------
@@ -96,38 +95,42 @@ def stats(
9695
source_nodes: Optional[Any] = None,
9796
) -> ArticleRankStatsResult:
9897
"""
99-
Executes the ArticleRank algorithm and returns result statistics without writing the result to Neo4j.
98+
Runs the Article Rank algorithm and returns result statistics without storing the results.
99+
100+
ArticleRank is a variant of the Page Rank algorithm, which measures the transitive influence of nodes.
101+
Page Rank follows the assumption that relationships originating from low-degree nodes have a higher influence than relationships from high-degree nodes.
102+
Article Rank lowers the influence of low-degree nodes by lowering the scores being sent to their neighbors in each iteration.
100103
101104
Parameters
102105
----------
103106
G : GraphV2
104107
The graph to run the algorithm on
105108
damping_factor : Optional[float], default=None
106-
The damping factor controls the probability of a random jump to a random node
109+
Probability of a jump to a random node.
107110
tolerance : Optional[float], default=None
108-
Minimum change in scores between iterations
111+
Minimum change in scores between iterations.
109112
max_iterations : Optional[int], default=None
110-
The maximum number of iterations to run
113+
Maximum number of iterations to run.
111114
scaler : Optional[Any], default=None
112-
Configuration for scaling the scores
115+
Name of the scaler applied on the resulting scores.
113116
relationship_types : Optional[List[str]], default=None
114-
The relationships types used to select relationships for this algorithm run
117+
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
115118
node_labels : Optional[List[str]], default=None
116-
The node labels used to select nodes for this algorithm run
119+
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
117120
sudo : Optional[bool], default=None
118-
Override memory estimation limits
121+
Disable the memory guard.
119122
log_progress : Optional[bool], default=None
120-
Whether to log progress
123+
Display progress logging.
121124
username : Optional[str], default=None
122125
The username to attribute the procedure run to
123126
concurrency : Optional[Any], default=None
124-
The number of concurrent threads
127+
Number of threads to use for running the algorithm.
125128
job_id : Optional[Any], default=None
126-
An identifier for the job
129+
Identifier for the job.
127130
relationship_weight_property : Optional[str], default=None
128-
The property name that contains weight
131+
Name of the property to be used as weights.
129132
source_nodes : Optional[Any], default=None
130-
The source nodes for personalized ArticleRank
133+
List of node ids to use as starting points. Use a list of list pairs to associate each node with a bias > 0.
131134
132135
Returns
133136
-------
@@ -214,7 +217,11 @@ def write(
214217
write_concurrency: Optional[int] = None,
215218
) -> ArticleRankWriteResult:
216219
"""
217-
Executes the ArticleRank algorithm and writes the results to Neo4j.
220+
Runs the Article Rank algorithm and stores the result in the Neo4j database as a new node property.
221+
222+
ArticleRank is a variant of the Page Rank algorithm, which measures the transitive influence of nodes.
223+
Page Rank follows the assumption that relationships originating from low-degree nodes have a higher influence than relationships from high-degree nodes.
224+
Article Rank lowers the influence of low-degree nodes by lowering the scores being sent to their neighbors in each iteration.
218225
219226
Parameters
220227
----------
@@ -223,31 +230,31 @@ def write(
223230
write_property : str
224231
The property name to write the ArticleRank score for each node
225232
damping_factor : Optional[float], default=None
226-
The damping factor controls the probability of a random jump to a random node
233+
Probability of a jump to a random node.
227234
tolerance : Optional[float], default=None
228-
Minimum change in scores between iterations
235+
Minimum change in scores between iterations.
229236
max_iterations : Optional[int], default=None
230-
The maximum number of iterations to run
237+
Maximum number of iterations to run.
231238
scaler : Optional[Any], default=None
232-
Configuration for scaling the scores
239+
Name of the scaler applied on the resulting scores.
233240
relationship_types : Optional[List[str]], default=None
234-
The relationships types used to select relationships for this algorithm run
241+
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
235242
node_labels : Optional[List[str]], default=None
236-
The node labels used to select nodes for this algorithm run
243+
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
237244
sudo : Optional[bool], default=None
238-
Override memory estimation limits
245+
Disable the memory guard.
239246
log_progress : Optional[bool], default=None
240-
Whether to log progress
247+
Display progress logging.
241248
username : Optional[str], default=None
242249
The username to attribute the procedure run to
243250
concurrency : Optional[Any], default=None
244-
The number of concurrent threads
251+
Number of threads to use for running the algorithm.
245252
job_id : Optional[Any], default=None
246-
An identifier for the job
253+
Identifier for the job.
247254
relationship_weight_property : Optional[str], default=None
248-
The property name that contains weight
255+
Name of the property to be used as weights.
249256
source_nodes : Optional[Any], default=None
250-
The source nodes for personalized ArticleRank
257+
List of node ids to use as starting points. Use a list of list pairs to associate each node with a bias > 0.
251258
write_concurrency : Optional[int], default=None
252259
The number of concurrent threads used for writing
253260

0 commit comments

Comments
 (0)