Skip to content

Commit f96391e

Browse files
committed
WIP
1 parent f55b82b commit f96391e

File tree

7 files changed

+89
-65
lines changed

7 files changed

+89
-65
lines changed

graphdatascience/procedure_surface/api/centrality/betweenness_endpoints.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@
77

88
from graphdatascience.procedure_surface.api.base_result import BaseResult
99
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
10+
1011
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
1112

1213

1314
class BetweennessEndpoints(ABC):
15+
1416
@abstractmethod
1517
def mutate(
1618
self,

graphdatascience/procedure_surface/api/centrality/pagerank_endpoints.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@
77

88
from graphdatascience.procedure_surface.api.base_result import BaseResult
99
from graphdatascience.procedure_surface.api.catalog.graph_api import GraphV2
10+
1011
from graphdatascience.procedure_surface.api.estimation_result import EstimationResult
1112

1213

1314
class PageRankEndpoints(ABC):
15+
1416
@abstractmethod
1517
def mutate(
1618
self,

graphdatascience/procedure_surface/api/community/k1coloring_endpoints.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@
1111

1212

1313
class K1ColoringEndpoints(ABC):
14-
"""
15-
Abstract base class defining the API for the K-1 Coloring algorithm.
16-
"""
1714

1815
@abstractmethod
1916
def mutate(
@@ -31,37 +28,43 @@ def mutate(
3128
job_id: Optional[Any] = None,
3229
) -> K1ColoringMutateResult:
3330
"""
34-
Executes the K-1 Coloring algorithm and writes the results to the in-memory graph as node properties.
31+
Runs the K-1 Coloring algorithm and stores the results in the graph catalog as a new node property.
32+
33+
The K-1 Coloring algorithm assigns a color to every node in the graph, trying to optimize for two objectives:
34+
to make sure that every neighbor of a given node has a different color than the node itself, and to use as few colors as possible.
35+
Note that the graph coloring problem is proven to be NP-complete, which makes it intractable on anything but trivial graph sizes.
36+
For that reason the implemented algorithm is a greedy algorithm that is neither guaranteed to produce an optimal solution nor always produce a correct result where no two neighboring nodes have different colors.
37+
However, the precision can be controlled by the number of iterations this algorithm runs.
3538
3639
Parameters
3740
----------
3841
G : GraphV2
3942
The graph to run the algorithm on
4043
mutate_property : str
41-
The property name to store the color for each node
44+
Name of the node property to store the results in.
4245
batch_size : Optional[int], default=None
4346
The batch size for processing
4447
max_iterations : Optional[int], default=None
45-
The maximum number of iterations of K-1 Coloring to run
48+
Maximum number of iterations to run.
4649
relationship_types : Optional[List[str]], default=None
47-
The relationships types used to select relationships for this algorithm run
50+
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
4851
node_labels : Optional[List[str]], default=None
49-
The node labels used to select nodes for this algorithm run
52+
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
5053
sudo : Optional[bool], default=None
51-
Override memory estimation limits
54+
Disable the memory guard.
5255
log_progress : Optional[bool], default=None
53-
Whether to log progress
56+
Display progress logging.
5457
username : Optional[str], default=None
55-
The username to attribute the procedure run to
58+
As an administrator, run the algorithm as a different user, to access also their graphs.
5659
concurrency : Optional[Any], default=None
57-
The number of concurrent threads
60+
Number of CPU threads to use.
5861
job_id : Optional[Any], default=None
59-
An identifier for the job
62+
Identifier for the computation.
6063
6164
Returns
6265
-------
6366
K1ColoringMutateResult
64-
Algorithm metrics and statistics
67+
Algorithm metrics and statistics including node coloring results
6568
"""
6669
pass
6770

graphdatascience/procedure_surface/api/community/kcore_endpoints.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,33 +29,38 @@ def mutate(
2929
job_id: Optional[Any] = None,
3030
) -> KCoreMutateResult:
3131
"""
32-
Executes the K-Core algorithm and writes the results to the in-memory graph as node properties.
32+
Runs the K-Core Decomposition algorithm and stores the results in the graph catalog as a new node property.
33+
34+
The K-core decomposition constitutes a process that separates the nodes in a graph into groups based on the degree sequence and topology of the graph.
35+
The term `i-core` refers to a maximal subgraph of the original graph such that each node in this subgraph has degree at least `i`.
36+
Each node is associated with a core value which denotes the largest value `i` such that the node belongs to the `i-core`.
37+
Standard algorithms for K-Core Decomposition iteratively remove the node of lowest degree until the graph becomes empty.
3338
3439
Parameters
3540
----------
3641
G : GraphV2
3742
The graph to run the algorithm on
3843
mutate_property : str
39-
The property name to store the core value for each node
44+
Name of the node property to store the results in.
4045
relationship_types : Optional[List[str]], default=None
41-
The relationships types used to select relationships for this algorithm run
46+
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
4247
node_labels : Optional[List[str]], default=None
43-
The node labels used to select nodes for this algorithm run
48+
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
4449
sudo : Optional[bool], default=None
45-
Override memory estimation limits
50+
Disable the memory guard.
4651
log_progress : Optional[bool], default=None
47-
Whether to log progress
52+
Display progress logging.
4853
username : Optional[str], default=None
49-
The username to attribute the procedure run to
54+
As an administrator, run the algorithm as a different user, to access also their graphs.
5055
concurrency : Optional[Any], default=None
51-
The number of concurrent threads
52-
job_id : Optional[Any], default=None
53-
An identifier for the job
56+
Number of CPU threads to use.
57+
job_id : Optional[Any] = None
58+
Identifier for the computation.
5459
5560
Returns
5661
-------
5762
KCoreMutateResult
58-
Algorithm metrics and statistics
63+
Algorithm metrics and statistics including core decomposition results
5964
"""
6065
pass
6166

graphdatascience/procedure_surface/api/community/louvain_endpoints.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,47 +36,51 @@ def mutate(
3636
relationship_weight_property: Optional[str] = None,
3737
) -> LouvainMutateResult:
3838
"""
39-
Executes the Louvain algorithm and writes the results to the in-memory graph as node properties.
39+
Runs the Louvain algorithm and stores the results in the graph catalog as a new node property.
40+
41+
The Louvain method is an algorithm to detect communities in large networks.
42+
It maximizes a modularity score for each community, where the modularity quantifies the quality of an assignment of nodes to communities by evaluating how much more densely connected the nodes within a community are, compared to how connected they would be in a random network.
43+
The Louvain algorithm is a hierarchical clustering algorithm that recursively merges communities into a single node and executes the modularity clustering on the condensed graphs.
4044
4145
Parameters
4246
----------
4347
G : GraphV2
4448
The graph to run the algorithm on
4549
mutate_property : str
46-
The property name to store the community ID for each node
50+
Name of the node property to store the results in.
4751
tolerance : Optional[float], default=None
48-
The tolerance value for the algorithm convergence
52+
Minimum change in scores between iterations.
4953
max_levels : Optional[int], default=None
5054
The maximum number of levels in the hierarchy
5155
include_intermediate_communities : Optional[bool], default=None
52-
Whether to include intermediate community assignments
56+
Whether to include intermediate communities
5357
max_iterations : Optional[int], default=None
54-
The maximum number of iterations per level
58+
Maximum number of iterations to run.
5559
relationship_types : Optional[List[str]], default=None
56-
The relationships types used to select relationships for this algorithm run
60+
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
5761
node_labels : Optional[List[str]], default=None
58-
The node labels used to select nodes for this algorithm run
62+
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
5963
sudo : Optional[bool], default=None
60-
Override memory estimation limits
64+
Disable the memory guard.
6165
log_progress : Optional[bool], default=None
62-
Whether to log progress
66+
Display progress logging.
6367
username : Optional[str], default=None
64-
The username to attribute the procedure run to
68+
As an administrator, run the algorithm as a different user, to access also their graphs.
6569
concurrency : Optional[Any], default=None
66-
The number of concurrent threads
70+
Number of CPU threads to use.
6771
job_id : Optional[Any], default=None
68-
An identifier for the job
72+
Identifier for the computation.
6973
seed_property : Optional[str], default=None
70-
Defines node properties that are used as initial community identifiers
74+
The property name that contains seed values
7175
consecutive_ids : Optional[bool], default=None
72-
Flag to decide whether community identifiers are mapped into a consecutive id space
76+
Whether to use consecutive IDs
7377
relationship_weight_property : Optional[str], default=None
74-
The property name that contains weight
78+
Name of the property to be used as weights.
7579
7680
Returns
7781
-------
7882
LouvainMutateResult
79-
Algorithm metrics and statistics
83+
Algorithm metrics and statistics including community detection results
8084
"""
8185
pass
8286

graphdatascience/procedure_surface/api/community/scc_endpoints.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,35 +30,39 @@ def mutate(
3030
consecutive_ids: Optional[bool] = None,
3131
) -> SccMutateResult:
3232
"""
33-
Executes the SCC algorithm and writes the results to the in-memory graph as node properties.
33+
Runs the Strongly Connected Components algorithm and stores the results in the graph catalog as a new node property.
34+
35+
The Strongly Connected Components (SCC) algorithm finds maximal sets of connected nodes in a directed graph.
36+
A set is considered a strongly connected component if there is a directed path between each pair of nodes within the set.
37+
It is often used early in a graph analysis process to help get an idea of how the graph is structured.
3438
3539
Parameters
3640
----------
3741
G : GraphV2
3842
The graph to run the algorithm on
3943
mutate_property : str
40-
The property name to store the component ID for each node
44+
Name of the node property to store the results in.
4145
relationship_types : Optional[List[str]], default=None
42-
The relationships types used to select relationships for this algorithm run
46+
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
4347
node_labels : Optional[List[str]], default=None
44-
The node labels used to select nodes for this algorithm run
48+
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
4549
sudo : Optional[bool], default=None
46-
Override memory estimation limits
50+
Disable the memory guard.
4751
log_progress : Optional[bool], default=None
48-
Whether to log progress
52+
Display progress logging.
4953
username : Optional[str], default=None
50-
The username to attribute the procedure run to
54+
As an administrator, run the algorithm as a different user, to access also their graphs.
5155
concurrency : Optional[Any], default=None
52-
The number of concurrent threads
56+
Number of CPU threads to use.
5357
job_id : Optional[Any], default=None
54-
An identifier for the job
58+
Identifier for the computation.
5559
consecutive_ids : Optional[bool], default=None
56-
Flag to decide whether component identifiers are mapped into a consecutive id space
60+
Whether to use consecutive IDs for components
5761
5862
Returns
5963
-------
6064
SccMutateResult
61-
Algorithm metrics and statistics
65+
Algorithm metrics and statistics including strongly connected components results
6266
"""
6367
pass
6468

graphdatascience/procedure_surface/api/community/wcc_endpoints.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,41 +33,45 @@ def mutate(
3333
relationship_weight_property: Optional[str] = None,
3434
) -> WccMutateResult:
3535
"""
36-
Executes the WCC algorithm and writes the results to the in-memory graph as node properties.
36+
Runs the Weakly Connected Components algorithm and stores the results in the graph catalog as a new node property.
37+
38+
The Weakly Connected Components (WCC) algorithm finds sets of connected nodes in directed and undirected graphs where two nodes are connected if there exists a path between them.
39+
In contrast to Strongly Connected Components (SCC), the direction of relationships on the path between two nodes is not considered.
40+
WCC is often used early in an analysis to understand the structure of a graph, enabling running other algorithms independently on an identified cluster.
3741
3842
Parameters
3943
----------
4044
G : GraphV2
4145
The graph to run the algorithm on
4246
mutate_property : str
43-
The property name to store the component ID for each node
47+
Name of the node property to store the results in.
4448
threshold : Optional[float], default=None
4549
The minimum required weight to consider a relationship during traversal
4650
relationship_types : Optional[List[str]], default=None
47-
The relationships types used to select relationships for this algorithm run
51+
Filter the graph using the given relationship types. Relationships with any of the given types will be included.
4852
node_labels : Optional[List[str]], default=None
49-
The node labels used to select nodes for this algorithm run
53+
Filter the graph using the given node labels. Nodes with any of the given labels will be included.
5054
sudo : Optional[bool], default=None
51-
Override memory estimation limits
55+
Disable the memory guard.
5256
log_progress : Optional[bool], default=None
53-
Whether to log progress
57+
Display progress logging.
5458
username : Optional[str], default=None
55-
The username to attribute the procedure run to
59+
As an administrator, run the algorithm as a different user, to access also their graphs.
5660
concurrency : Optional[Any], default=None
57-
The number of concurrent threads
61+
Number of CPU threads to use.
5862
job_id : Optional[Any], default=None
59-
An identifier for the job
63+
Identifier for the computation.
6064
seed_property : Optional[str], default=None
61-
Defines node properties that are used as initial component identifiers
65+
The property name that contains seed values
6266
consecutive_ids : Optional[bool], default=None
63-
Flag to decide whether component identifiers are mapped into a consecutive id space
67+
Whether to use consecutive IDs for components
6468
relationship_weight_property : Optional[str], default=None
65-
The property name that contains weight
69+
Name of the property to be used as weights.
6670
6771
Returns
6872
-------
6973
WccMutateResult
70-
Algorithm metrics and statistics
74+
Algorithm metrics and statistics including weakly connected components results
7175
"""
7276
pass
7377

0 commit comments

Comments
 (0)