Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions graphrag/config/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ class StorageType(str, Enum):
"""The blob output type."""
cosmosdb = "cosmosdb"
"""The cosmosdb output type"""
postgres = "postgres"
"""The postgres output type."""

def __repr__(self):
"""Get a string representation."""
Expand Down
42 changes: 42 additions & 0 deletions graphrag/config/models/storage_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,45 @@ def validate_base_dir(cls, value, info):
description="The cosmosdb account url to use.",
default=graphrag_config_defaults.storage.cosmosdb_account_url,
)

### PostgreSQL
host: str = Field(
description="PostgreSQL server host (for postgres type).",
default="localhost"
)
port: int = Field(
description="PostgreSQL server port (for postgres type).",
default=5432
)
database: str = Field(
description="PostgreSQL database name (for postgres type).",
default="graphrag"
)
username: str | None = Field(
description="PostgreSQL username for authentication (for postgres type).",
default=None
)
password: str | None = Field(
description="PostgreSQL password for authentication (for postgres type).",
default=None
)
collection_prefix: str = Field(
description="Prefix for PostgreSQL collection names (for postgres type).",
default="graphrag_"
)
batch_size: int = Field(
description="Batch size for database operations (for postgres type).",
default=50
)
command_timeout: int = Field(
description="Command timeout for database operations (for postgres type).",
default=600
)
server_timeout: int = Field(
description="Server timeout for database connections (for postgres type).",
default=120
)
connection_timeout: int = Field(
description="Connection timeout for establishing database connections (for postgres type).",
default=60
)
12 changes: 9 additions & 3 deletions graphrag/index/operations/finalize_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ def finalize_entities(
layout_enabled: bool = False,
) -> pd.DataFrame:
"""All the steps to transform final entities."""
# # Remove the default column degree, x and y for Postgres storage compatibility. And below entities.merge method
# # will add them back with calculated values.
entities = entities.drop(columns=["degree", "x", "y"], errors="ignore")
graph = create_graph(relationships, edge_attr=["weight"])
graph_embeddings = None
if embed_config is not None and embed_config.enabled:
Expand All @@ -45,9 +48,12 @@ def finalize_entities(
final_entities["degree"] = final_entities["degree"].fillna(0).astype(int)
final_entities.reset_index(inplace=True)
final_entities["human_readable_id"] = final_entities.index
final_entities["id"] = final_entities["human_readable_id"].apply(
lambda _x: str(uuid4())
)

# Generate id if id is empty
if "id" not in final_entities.columns or final_entities["id"].isna().all():
final_entities["id"] = final_entities["human_readable_id"].apply(
lambda _x: str(uuid4())
)
return final_entities.loc[
:,
ENTITIES_FINAL_COLUMNS,
Expand Down
9 changes: 6 additions & 3 deletions graphrag/index/operations/finalize_relationships.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@ def finalize_relationships(

final_relationships.reset_index(inplace=True)
final_relationships["human_readable_id"] = final_relationships.index
final_relationships["id"] = final_relationships["human_readable_id"].apply(
lambda _x: str(uuid4())
)

# Generate id if there is no id
if "id" not in final_relationships.columns or final_relationships["id"].isna().all():
final_relationships["id"] = final_relationships["human_readable_id"].apply(
lambda _x: str(uuid4())
)

return final_relationships.loc[
:,
Expand Down
6 changes: 5 additions & 1 deletion graphrag/index/workflows/create_communities.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,11 @@ def create_communities(

# join it all up and add some new fields
final_communities = all_grouped.merge(entity_ids, on="community", how="inner")
final_communities["id"] = [str(uuid4()) for _ in range(len(final_communities))]

# Generate id if there is no id
if "id" not in final_communities.columns or final_communities["id"].isna().all():
final_communities["id"] = [str(uuid4()) for _ in range(len(final_communities))]

final_communities["human_readable_id"] = final_communities["community"]
final_communities["title"] = "Community " + final_communities["community"].astype(
str
Expand Down
2 changes: 2 additions & 0 deletions graphrag/storage/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from graphrag.storage.blob_pipeline_storage import create_blob_storage
from graphrag.storage.cosmosdb_pipeline_storage import create_cosmosdb_storage
from graphrag.storage.file_pipeline_storage import create_file_storage
from graphrag.storage.postgres_pipeline_storage import PostgresPipelineStorage
from graphrag.storage.memory_pipeline_storage import MemoryPipelineStorage

if TYPE_CHECKING:
Expand Down Expand Up @@ -99,3 +100,4 @@ def is_supported_storage_type(cls, storage_type: str) -> bool:
StorageFactory.register(StorageType.cosmosdb.value, create_cosmosdb_storage)
StorageFactory.register(StorageType.file.value, create_file_storage)
StorageFactory.register(StorageType.memory.value, lambda **_: MemoryPipelineStorage())
StorageFactory.register(StorageType.postgres.value, PostgresPipelineStorage)
Loading