Skip to content

arrow v2/centrality part 2 #930

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions graphdatascience/procedure_surface/api/base_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from typing import Any

from pydantic import BaseModel
from pydantic.alias_generators import to_camel


class BaseResult(BaseModel, alias_generator=to_camel):
def __getitem__(self, item: str) -> Any:
return getattr(self, item)
25 changes: 5 additions & 20 deletions graphdatascience/procedure_surface/api/betweenness_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from typing import Any, List, Optional

from pandas import DataFrame
from pydantic import BaseModel, ConfigDict
from pydantic.alias_generators import to_camel

from graphdatascience.procedure_surface.api.base_result import BaseResult

from ...graph.graph_object import Graph
from .estimation_result import EstimationResult
Expand Down Expand Up @@ -244,11 +244,9 @@ def estimate(
"""


class BetweennessMutateResult(BaseModel):
class BetweennessMutateResult(BaseResult):
"""Result of running Betweenness Centrality algorithm with mutate mode."""

model_config = ConfigDict(alias_generator=to_camel)

node_properties_written: int
pre_processing_millis: int
compute_millis: int
Expand All @@ -257,37 +255,24 @@ class BetweennessMutateResult(BaseModel):
centrality_distribution: dict[str, Any]
configuration: dict[str, Any]

def __getitem__(self, item: str) -> Any:
return getattr(self, item)


class BetweennessStatsResult(BaseModel):
class BetweennessStatsResult(BaseResult):
"""Result of running Betweenness Centrality algorithm with stats mode."""

model_config = ConfigDict(alias_generator=to_camel)

centrality_distribution: dict[str, Any]
pre_processing_millis: int
compute_millis: int
post_processing_millis: int
configuration: dict[str, Any]

def __getitem__(self, item: str) -> Any:
return getattr(self, item)


class BetweennessWriteResult(BaseModel):
class BetweennessWriteResult(BaseResult):
"""Result of running Betweenness Centrality algorithm with write mode."""

model_config = ConfigDict(alias_generator=to_camel)

node_properties_written: int
pre_processing_millis: int
compute_millis: int
post_processing_millis: int
write_millis: int
centrality_distribution: dict[str, Any]
configuration: dict[str, Any]

def __getitem__(self, item: str) -> Any:
return getattr(self, item)
316 changes: 316 additions & 0 deletions graphdatascience/procedure_surface/api/degree_endpoints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, List, Optional

from pandas import DataFrame

from ...graph.graph_object import Graph
from .base_result import BaseResult
from .estimation_result import EstimationResult


class DegreeEndpoints(ABC):
"""
Abstract base class defining the API for the Degree Centrality algorithm.

Degree centrality measures the number of incoming and outgoing relationships from a node.
It's one of the simplest centrality measures, where a node's importance is determined by
the number of direct connections it has.
"""

@abstractmethod
def mutate(
self,
G: Graph,
mutate_property: str,
orientation: Optional[Any] = None,
relationship_types: Optional[List[str]] = None,
node_labels: Optional[List[str]] = None,
sudo: Optional[bool] = None,
log_progress: Optional[bool] = None,
username: Optional[str] = None,
concurrency: Optional[Any] = None,
job_id: Optional[Any] = None,
relationship_weight_property: Optional[str] = None,
) -> DegreeMutateResult:
"""
Executes the Degree Centrality algorithm and writes the results to the in-memory graph as node properties.

Parameters
----------
G : Graph
The graph to run the algorithm on
mutate_property : str
The property name to store the degree centrality score for each node
orientation : Optional[Any], default=None
The orientation of relationships to consider. Can be 'NATURAL', 'REVERSE', or 'UNDIRECTED'.
'NATURAL' (default) respects the direction of relationships as they are stored in the graph.
'REVERSE' treats each relationship as if it were directed in the opposite direction.
'UNDIRECTED' treats all relationships as undirected, effectively counting both directions.
relationship_types : Optional[List[str]], default=None
The relationship types used to select relationships for this algorithm run.
If not specified, all relationship types are considered.
node_labels : Optional[List[str]], default=None
The node labels used to select nodes for this algorithm run.
If not specified, all node labels are considered.
sudo : Optional[bool], default=None
Override memory estimation limits. Use with caution as this can lead to
memory issues if the estimation is significantly wrong.
log_progress : Optional[bool], default=None
Whether to log progress of the algorithm execution
username : Optional[str], default=None
The username to attribute the procedure run to for auditing purposes
concurrency : Optional[Any], default=None
The number of concurrent threads used for the algorithm execution.
If not specified, uses the default concurrency level.
job_id : Optional[Any], default=None
An identifier for the job that can be used for monitoring and cancellation
relationship_weight_property : Optional[str], default=None
The property name that contains relationship weights. If specified,
weighted degree centrality is computed where each relationship contributes
its weight to the total degree.

Returns
-------
DegreeMutateResult
Algorithm metrics and statistics including the centrality distribution
"""
pass

@abstractmethod
def stats(
self,
G: Graph,
orientation: Optional[Any] = None,
relationship_types: Optional[List[str]] = None,
node_labels: Optional[List[str]] = None,
sudo: Optional[bool] = None,
log_progress: Optional[bool] = None,
username: Optional[str] = None,
concurrency: Optional[Any] = None,
job_id: Optional[Any] = None,
relationship_weight_property: Optional[str] = None,
) -> DegreeStatsResult:
"""
Executes the Degree Centrality algorithm and returns statistics without writing the result to Neo4j.

Parameters
----------
G : Graph
The graph to run the algorithm on
orientation : Optional[Any], default=None
The orientation of relationships to consider. Can be 'NATURAL', 'REVERSE', or 'UNDIRECTED'.
'NATURAL' (default) respects the direction of relationships as they are stored in the graph.
'REVERSE' treats each relationship as if it were directed in the opposite direction.
'UNDIRECTED' treats all relationships as undirected, effectively counting both directions.
relationship_types : Optional[List[str]], default=None
The relationship types used to select relationships for this algorithm run.
If not specified, all relationship types are considered.
node_labels : Optional[List[str]], default=None
The node labels used to select nodes for this algorithm run.
If not specified, all node labels are considered.
sudo : Optional[bool], default=None
Override memory estimation limits. Use with caution as this can lead to
memory issues if the estimation is significantly wrong.
log_progress : Optional[bool], default=None
Whether to log progress of the algorithm execution
username : Optional[str], default=None
The username to attribute the procedure run to for auditing purposes
concurrency : Optional[Any], default=None
The number of concurrent threads used for the algorithm execution.
If not specified, uses the default concurrency level.
job_id : Optional[Any], default=None
An identifier for the job that can be used for monitoring and cancellation
relationship_weight_property : Optional[str], default=None
The property name that contains relationship weights. If specified,
weighted degree centrality is computed where each relationship contributes
its weight to the total degree.

Returns
-------
DegreeStatsResult
Algorithm statistics including the centrality distribution
"""
pass

@abstractmethod
def stream(
self,
G: Graph,
orientation: Optional[Any] = None,
relationship_types: Optional[List[str]] = None,
node_labels: Optional[List[str]] = None,
sudo: Optional[bool] = None,
log_progress: Optional[bool] = None,
username: Optional[str] = None,
concurrency: Optional[Any] = None,
job_id: Optional[Any] = None,
relationship_weight_property: Optional[str] = None,
) -> DataFrame:
"""
Executes the Degree Centrality algorithm and returns a stream of results.

Parameters
----------
G : Graph
The graph to run the algorithm on
orientation : Optional[Any], default=None
The orientation of relationships to consider. Can be 'NATURAL', 'REVERSE', or 'UNDIRECTED'.
'NATURAL' (default) respects the direction of relationships as they are stored in the graph.
'REVERSE' treats each relationship as if it were directed in the opposite direction.
'UNDIRECTED' treats all relationships as undirected, effectively counting both directions.
relationship_types : Optional[List[str]], default=None
The relationship types used to select relationships for this algorithm run.
If not specified, all relationship types are considered.
node_labels : Optional[List[str]], default=None
The node labels used to select nodes for this algorithm run.
If not specified, all node labels are considered.
sudo : Optional[bool], default=None
Override memory estimation limits. Use with caution as this can lead to
memory issues if the estimation is significantly wrong.
log_progress : Optional[bool], default=None
Whether to log progress of the algorithm execution
username : Optional[str], default=None
The username to attribute the procedure run to for auditing purposes
concurrency : Optional[Any], default=None
The number of concurrent threads used for the algorithm execution.
If not specified, uses the default concurrency level.
job_id : Optional[Any], default=None
An identifier for the job that can be used for monitoring and cancellation
relationship_weight_property : Optional[str], default=None
The property name that contains relationship weights. If specified,
weighted degree centrality is computed where each relationship contributes
its weight to the total degree.

Returns
-------
DataFrame
DataFrame with nodeId and score columns containing degree centrality results.
Each row represents a node with its corresponding degree centrality score.
"""
pass

@abstractmethod
def write(
self,
G: Graph,
write_property: str,
orientation: Optional[Any] = None,
relationship_types: Optional[List[str]] = None,
node_labels: Optional[List[str]] = None,
sudo: Optional[bool] = None,
log_progress: Optional[bool] = None,
username: Optional[str] = None,
concurrency: Optional[Any] = None,
job_id: Optional[Any] = None,
relationship_weight_property: Optional[str] = None,
write_concurrency: Optional[Any] = None,
) -> DegreeWriteResult:
"""
Executes the Degree Centrality algorithm and writes the results to the Neo4j database.

Parameters
----------
G : Graph
The graph to run the algorithm on
write_property : str
The property name to store the degree centrality score for each node in the database
orientation : Optional[Any], default=None
The orientation of relationships to consider. Can be 'NATURAL', 'REVERSE', or 'UNDIRECTED'.
'NATURAL' (default) respects the direction of relationships as they are stored in the graph.
'REVERSE' treats each relationship as if it were directed in the opposite direction.
'UNDIRECTED' treats all relationships as undirected, effectively counting both directions.
relationship_types : Optional[List[str]], default=None
The relationship types used to select relationships for this algorithm run.
If not specified, all relationship types are considered.
node_labels : Optional[List[str]], default=None
The node labels used to select nodes for this algorithm run.
If not specified, all node labels are considered.
sudo : Optional[bool], default=None
Override memory estimation limits. Use with caution as this can lead to
memory issues if the estimation is significantly wrong.
log_progress : Optional[bool], default=None
Whether to log progress of the algorithm execution
username : Optional[str], default=None
The username to attribute the procedure run to for auditing purposes
concurrency : Optional[Any], default=None
The number of concurrent threads used for the algorithm execution.
If not specified, uses the default concurrency level.
job_id : Optional[Any], default=None
An identifier for the job that can be used for monitoring and cancellation
relationship_weight_property : Optional[str], default=None
The property name that contains relationship weights. If specified,
weighted degree centrality is computed where each relationship contributes
its weight to the total degree.
write_concurrency : Optional[Any], default=None
The number of concurrent threads used during the write phase.
If not specified, uses the same value as concurrency.

Returns
-------
DegreeWriteResult
Algorithm metrics and statistics including the centrality distribution and write timing
"""
pass

@abstractmethod
def estimate(
self,
G: Optional[Graph] = None,
projection_config: Optional[dict[str, Any]] = None,
) -> EstimationResult:
"""
Estimate the memory consumption of the Degree Centrality algorithm.

This method provides an estimate of the memory requirements for running the algorithm
on a given graph, helping with capacity planning and resource allocation.

Parameters
----------
G : Optional[Graph], default=None
The graph to be used in the estimation. If None, requires projection_config.
projection_config : Optional[dict[str, Any]], default=None
Configuration dictionary for the projection. Used when G is None to estimate
memory requirements for a graph that would be created with this configuration.

Returns
-------
EstimationResult
An object containing the result of the estimation including memory requirements
"""
pass


class DegreeMutateResult(BaseResult):
"""Result of running Degree Centrality algorithm with mutate mode."""

node_properties_written: int
pre_processing_millis: int
compute_millis: int
post_processing_millis: int
mutate_millis: int
centrality_distribution: dict[str, Any]
configuration: dict[str, Any]


class DegreeStatsResult(BaseResult):
"""Result of running Degree Centrality algorithm with stats mode."""

centrality_distribution: dict[str, Any]
pre_processing_millis: int
compute_millis: int
post_processing_millis: int
configuration: dict[str, Any]


class DegreeWriteResult(BaseResult):
"""Result of running Degree Centrality algorithm with write mode."""

node_properties_written: int
pre_processing_millis: int
compute_millis: int
post_processing_millis: int
write_millis: int
centrality_distribution: dict[str, Any]
configuration: dict[str, Any]
Loading