Ruff formatting

NathalieCharbel · NathalieCharbel · commit 31a9a18685b9 · 2025-01-20T18:13:59.000+02:00
diff --git a/examples/customize/build_graph/pipeline/kg_builder_from_pdf.py b/examples/customize/build_graph/pipeline/kg_builder_from_pdf.py
@@ -17,7 +17,6 @@
 import asyncio
 import logging
 
-import neo4j
 from neo4j_graphrag.experimental.components.entity_relation_extractor import (
     LLMEntityRelationExtractor,
     OnError,
@@ -35,12 +34,12 @@
 from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
 from neo4j_graphrag.llm import LLMInterface, OpenAILLM
 
+import neo4j
+
 logging.basicConfig(level=logging.INFO)
 
 
-async def define_and_run_pipeline(
-    neo4j_driver: neo4j.Driver, llm: LLMInterface
-) -> PipelineResult:
+async def define_and_run_pipeline(neo4j_driver: neo4j.Driver, llm: LLMInterface) -> PipelineResult:
     from neo4j_graphrag.experimental.pipeline import Pipeline
 
     # Instantiate Entity and Relation objects
@@ -57,9 +56,7 @@ async def define_and_run_pipeline(
         ),
     ]
     relations = [
-        SchemaRelation(
-            label="SITUATED_AT", description="Indicates the location of a person."
-        ),
+        SchemaRelation(label="SITUATED_AT", description="Indicates the location of a person."),
         SchemaRelation(
             label="LED_BY",
             description="Indicates the leader of an organization.",
@@ -68,9 +65,7 @@ async def define_and_run_pipeline(
             label="OWNS",
             description="Indicates the ownership of an item such as a Horcrux.",
         ),
-        SchemaRelation(
-            label="INTERACTS", description="The interaction between two people."
-        ),
+        SchemaRelation(label="INTERACTS", description="The interaction between two people."),
     ]
     potential_schema = [
         ("PERSON", "SITUATED_AT", "LOCATION"),
@@ -131,9 +126,7 @@ async def main() -> PipelineResult:
             "response_format": {"type": "json_object"},
         },
     )
-    driver = neo4j.GraphDatabase.driver(
-        "bolt://localhost:7687", auth=("neo4j", "password")
-    )
+    driver = neo4j.GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
     res = await define_and_run_pipeline(driver, llm)
     driver.close()
     await llm.async_client.close()
diff --git a/examples/customize/build_graph/pipeline/kg_builder_from_text.py b/examples/customize/build_graph/pipeline/kg_builder_from_text.py
@@ -16,7 +16,6 @@
 
 import asyncio
 
-import neo4j
 from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
 from neo4j_graphrag.experimental.components.embedder import TextChunkEmbedder
 from neo4j_graphrag.experimental.components.entity_relation_extractor import (
@@ -37,10 +36,10 @@
 from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
 from neo4j_graphrag.llm import LLMInterface, OpenAILLM
 
+import neo4j
+
 
-async def define_and_run_pipeline(
-    neo4j_driver: neo4j.Driver, llm: LLMInterface
-) -> PipelineResult:
+async def define_and_run_pipeline(neo4j_driver: neo4j.Driver, llm: LLMInterface) -> PipelineResult:
     """This is where we define and run the KG builder pipeline, instantiating a few
     components:
     - Text Splitter: in this example we use the fixed size text splitter
@@ -75,9 +74,7 @@ async def define_and_run_pipeline(
     # and how the output of previous components must be used
     pipe.connect("splitter", "chunk_embedder", input_config={"text_chunks": "splitter"})
     pipe.connect("schema", "extractor", input_config={"schema": "schema"})
-    pipe.connect(
-        "chunk_embedder", "extractor", input_config={"chunks": "chunk_embedder"}
-    )
+    pipe.connect("chunk_embedder", "extractor", input_config={"chunks": "chunk_embedder"})
     pipe.connect(
         "extractor",
         "writer",
@@ -148,9 +145,7 @@ async def main() -> PipelineResult:
             "response_format": {"type": "json_object"},
         },
     )
-    driver = neo4j.GraphDatabase.driver(
-        "bolt://localhost:7687", auth=("neo4j", "password")
-    )
+    driver = neo4j.GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
     res = await define_and_run_pipeline(driver, llm)
     driver.close()
     await llm.async_client.close()
diff --git a/examples/customize/build_graph/pipeline/lexical_graph_builder_from_text.py b/examples/customize/build_graph/pipeline/lexical_graph_builder_from_text.py
@@ -2,7 +2,6 @@
 
 import asyncio
 
-import neo4j
 from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
 from neo4j_graphrag.experimental.components.embedder import TextChunkEmbedder
 from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter
@@ -14,6 +13,8 @@
 from neo4j_graphrag.experimental.pipeline import Pipeline
 from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
 
+import neo4j
+
 
 async def main(neo4j_driver: neo4j.Driver) -> PipelineResult:
     """This is where we define and run the Lexical Graph builder pipeline, instantiating
@@ -78,7 +79,5 @@ async def main(neo4j_driver: neo4j.Driver) -> PipelineResult:
 
 
 if __name__ == "__main__":
-    with neo4j.GraphDatabase.driver(
-        "bolt://localhost:7687", auth=("neo4j", "password")
-    ) as driver:
+    with neo4j.GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password")) as driver:
         print(asyncio.run(main(driver)))
diff --git a/examples/customize/build_graph/pipeline/text_to_lexical_graph_to_entity_graph_single_pipeline.py b/examples/customize/build_graph/pipeline/text_to_lexical_graph_to_entity_graph_single_pipeline.py
@@ -7,7 +7,6 @@
 
 import asyncio
 
-import neo4j
 from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
 from neo4j_graphrag.experimental.components.embedder import TextChunkEmbedder
 from neo4j_graphrag.experimental.components.entity_relation_extractor import (
@@ -29,6 +28,8 @@
 from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
 from neo4j_graphrag.llm import LLMInterface, OpenAILLM
 
+import neo4j
+
 
 async def define_and_run_pipeline(
     neo4j_driver: neo4j.Driver,
@@ -56,7 +57,7 @@ async def define_and_run_pipeline(
     pipe = Pipeline()
     # define the components
     pipe.add_component(
-        FixedSizeSplitter(chunk_size=200, chunk_overlap=50,approximate=False),
+        FixedSizeSplitter(chunk_size=200, chunk_overlap=50, approximate=False),
         "splitter",
     )
     pipe.add_component(TextChunkEmbedder(embedder=OpenAIEmbeddings()), "chunk_embedder")
@@ -92,9 +93,7 @@ async def define_and_run_pipeline(
     )
     # define the execution order of component
     # and how the output of previous components must be used
-    pipe.connect(
-        "chunk_embedder", "extractor", input_config={"chunks": "chunk_embedder"}
-    )
+    pipe.connect("chunk_embedder", "extractor", input_config={"chunks": "chunk_embedder"})
     pipe.connect("schema", "extractor", input_config={"schema": "schema"})
     pipe.connect(
         "extractor",
@@ -189,7 +188,5 @@ async def main(driver: neo4j.Driver) -> PipelineResult:
 
 
 if __name__ == "__main__":
-    with neo4j.GraphDatabase.driver(
-        "bolt://localhost:7687", auth=("neo4j", "password")
-    ) as driver:
+    with neo4j.GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password")) as driver:
         print(asyncio.run(main(driver)))
diff --git a/examples/customize/build_graph/pipeline/text_to_lexical_graph_to_entity_graph_two_pipelines.py b/examples/customize/build_graph/pipeline/text_to_lexical_graph_to_entity_graph_two_pipelines.py
@@ -8,7 +8,6 @@
 
 import asyncio
 
-import neo4j
 from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
 from neo4j_graphrag.experimental.components.embedder import TextChunkEmbedder
 from neo4j_graphrag.experimental.components.entity_relation_extractor import (
@@ -31,6 +30,8 @@
 from neo4j_graphrag.experimental.pipeline.pipeline import PipelineResult
 from neo4j_graphrag.llm import LLMInterface, OpenAILLM
 
+import neo4j
+
 
 async def build_lexical_graph(
     neo4j_driver: neo4j.Driver,
@@ -200,15 +201,11 @@ async def main(driver: neo4j.Driver) -> PipelineResult:
         },
     )
     await build_lexical_graph(driver, lexical_graph_config, text=text)
-    res = await read_chunk_and_perform_entity_extraction(
-        driver, llm, lexical_graph_config
-    )
+    res = await read_chunk_and_perform_entity_extraction(driver, llm, lexical_graph_config)
     await llm.async_client.close()
     return res
 
 
 if __name__ == "__main__":
-    with neo4j.GraphDatabase.driver(
-        "bolt://localhost:7687", auth=("neo4j", "password")
-    ) as driver:
+    with neo4j.GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password")) as driver:
         print(asyncio.run(main(driver)))
diff --git a/src/neo4j_graphrag/experimental/components/text_splitters/fixed_size_splitter.py b/src/neo4j_graphrag/experimental/components/text_splitters/fixed_size_splitter.py
@@ -58,7 +58,7 @@ def _adjust_chunk_end(text: str, start: int, approximate_end: int) -> int:
     """
     end = approximate_end
     if end < len(text):
-        while end > start and not text[end].isspace() and not text[end-1].isspace():
+        while end > start and not text[end].isspace() and not text[end - 1].isspace():
             end -= 1
 
         # fallback if no whitespace is found
@@ -92,7 +92,9 @@ class FixedSizeSplitter(TextSplitter):
     """
 
     @validate_call
-    def __init__(self, chunk_size: int = 4000, chunk_overlap: int = 200, approximate: bool = True) -> None:
+    def __init__(
+        self, chunk_size: int = 4000, chunk_overlap: int = 200, approximate: bool = True
+    ) -> None:
         if chunk_size <= 0:
             raise ValueError("chunk_size must be strictly greater than 0")
         if chunk_overlap >= chunk_size:
@@ -131,7 +133,7 @@ async def run(self, text: str) -> TextChunks:
                 end = _adjust_chunk_end(text, start, approximate_end)
                 # when avoiding splitting words in the middle is not possible, revert to
                 # initial chunk end and skip adjusting next chunk start
-                skip_adjust_chunk_start = (end == approximate_end)
+                skip_adjust_chunk_start = end == approximate_end
             else:
                 # apply fixed size splitting with possibly words cut in half at chunk
                 # boundaries
diff --git a/tests/unit/experimental/components/text_splitters/test_fixed_size_splitter.py b/tests/unit/experimental/components/text_splitters/test_fixed_size_splitter.py
@@ -16,7 +16,9 @@
 
 import pytest
 from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import (
-    FixedSizeSplitter, _adjust_chunk_start, _adjust_chunk_end,
+    FixedSizeSplitter,
+    _adjust_chunk_end,
+    _adjust_chunk_start,
 )
 from neo4j_graphrag.experimental.components.types import TextChunk
 
@@ -101,7 +103,11 @@ def test_invalid_chunk_size() -> None:
         ("Hello World", 0, 0),
     ],
 )
-def test_adjust_chunk_start(text, approximate_start, expected_start):
+def test_adjust_chunk_start(
+    text: str,
+    approximate_start: int,
+    expected_start: int
+) -> None:
     """
     Test that the _adjust_chunk_start function correctly shifts
     the start index to avoid breaking words, unless no whitespace is found.
@@ -125,7 +131,12 @@ def test_adjust_chunk_start(text, approximate_start, expected_start):
         ("Hello World", 6, 15, 15),
     ],
 )
-def test_adjust_chunk_end(text, start, approximate_end, expected_end):
+def test_adjust_chunk_end(
+    text: str,
+    start: int,
+    approximate_end: int,
+    expected_end: int
+) -> None:
     """
     Test that the _adjust_chunk_end function correctly shifts
     the end index to avoid breaking words, unless no whitespace is found.
@@ -144,27 +155,15 @@ def test_adjust_chunk_end(text, start, approximate_end, expected_end):
             10,
             2,
             True,
-            [
-                "Hello ",
-                "World, ",
-                "this is a ",
-                "a test ",
-                "message."
-            ],
+            ["Hello ", "World, ", "this is a ", "a test ", "message."],
         ),
         # Case: fixed size splitting
         (
             "Hello World, this is a test message.",
             10,
             2,
             False,
-            [
-                "Hello Worl",
-                "rld, this ",
-                "s is a tes",
-                "est messag",
-                "age."
-            ],
+            ["Hello Worl", "rld, this ", "s is a tes", "est messag", "age."],
         ),
         # Case: short text => only one chunk
         (
@@ -193,8 +192,12 @@ def test_adjust_chunk_end(text, start, approximate_end, expected_end):
     ],
 )
 async def test_fixed_size_splitter_run(
-    text, chunk_size, chunk_overlap, approximate, expected_chunks
-):
+    text: str,
+    chunk_size: int,
+    chunk_overlap: int,
+    approximate: bool,
+    expected_chunks: list[str]
+) -> None:
     """
     Test that 'FixedSizeSplitter.run' returns the expected chunks
     for different configurations.