Skip to content

Commit 5626d7e

Browse files
committed
Add backend support for PostgreSQL
1 parent 9727ce1 commit 5626d7e

18 files changed

+283
-53
lines changed

.github/workflows/main.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@ jobs:
3737
max-parallel: 4
3838
matrix:
3939
python-version: [3.7, 3.8, 3.9, '3.10', '3.11']
40+
services:
41+
postgresql:
42+
image: postgres:16
43+
ports:
44+
- 5432:5432
45+
env:
46+
POSTGRES_HOST_AUTH_METHOD: trust
4047
steps:
4148
- uses: actions/checkout@v3
4249
- name: Set up Python ${{ matrix.python-version }}
@@ -66,6 +73,13 @@ jobs:
6673
ls -l dist
6774
documentation:
6875
runs-on: ubuntu-latest
76+
services:
77+
postgresql:
78+
image: postgres:16
79+
ports:
80+
- 5432:5432
81+
env:
82+
POSTGRES_HOST_AUTH_METHOD: trust
6983
steps:
7084
- uses: actions/checkout@v3
7185
- name: Setup python

README.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,28 @@ Usage
7979
Please access http://localhost:8000/search.html
8080
8181
82+
Development
83+
===========
84+
85+
Install package in development mode::
86+
87+
pip install --editable='.[cli,docs,test]' --prefer-binary
88+
89+
Start PostgreSQL server::
90+
91+
docker run --rm -it --publish=5432:5432 --env "POSTGRES_HOST_AUTH_METHOD=trust" postgres:16 postgres -c log_statement=all
92+
93+
Invoke software tests::
94+
95+
export POSTGRES_LOG_STATEMENT=all
96+
pytest -vvv
97+
98+
Invoke linters::
99+
100+
pip install pre-commit
101+
pre-commit run --all-files
102+
103+
82104
.. _atsphinx-sqlite3fts: https://pypi.org/project/atsphinx-sqlite3fts/
83105
.. _Kazuya Takei: https://github.com/attakei
84106
.. _readthedocs-sphinx-search: https://github.com/readthedocs/readthedocs-sphinx-search

docs/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
}
3333
# atsphinx-sqlite3fts
3434
sqlite3fts_use_search_html = True
35+
sqlite3fts_database_url = "postgresql://postgres@localhost:5432"
3536

3637

3738
def setup(app): # noqa: D103

docs/getting-started.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,11 @@ You can build database by ``sqlite`` builder.
4747

4848
.. code-block:: console
4949
50-
make sqlite
51-
sqlite3 _build/sqlite/db.sqlite
50+
make fts-index
51+
52+
.. code-block:: console
53+
54+
psql postgresql://postgres@localhost:5432/ --command 'SELECT * FROM document;'
5255
5356
.. code-block:: sqlite3
5457

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ dynamic = ["version", "description"]
3535
dependencies = [
3636
"docutils",
3737
"peewee",
38+
"psycopg2[binary]",
3839
"sphinx<7",
3940
]
4041

src/atsphinx/sqlite3fts/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Sphinx document searcher using SQLite3."""
1+
"""Sphinx document searcher using SQL database."""
22
from sphinx.application import Sphinx
33

44
from . import builders, events
@@ -10,9 +10,10 @@ def setup(app: Sphinx):
1010
"""Entrypoint as Sphinx extension."""
1111
app.add_config_value("sqlite3fts_exclude_pages", [], "env")
1212
app.add_config_value("sqlite3fts_use_search_html", False, "env")
13-
app.add_builder(builders.SqliteBuilder)
13+
app.add_config_value("sqlite3fts_database_url", None, "env")
14+
app.add_builder(builders.FtsIndexer)
1415
app.connect("config-inited", events.setup_search_html)
15-
app.connect("builder-inited", events.configure_database)
16+
app.connect("config-inited", events.configure_database)
1617
app.connect("html-page-context", events.register_document)
1718
app.connect("build-finished", events.save_database)
1819
return {

src/atsphinx/sqlite3fts/builders.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
from . import models, services
88

99

10-
class SqliteBuilder(Builder):
11-
"""Single database generation builder.
10+
class FtsIndexer(Builder):
11+
"""
12+
Fulltext index builder for databases.
1213
13-
This is custom builder to generate only SQLite database file
14+
A custom builder to generate fulltext indexes, stored in SQL databases.
1415
"""
1516

16-
name = "sqlite"
17+
name = "fts-index"
1718
allow_parallel = True
1819

1920
def get_target_uri(self, docname: str, typ: str = None) -> str: # noqa: D102
@@ -23,7 +24,11 @@ def get_outdated_docs(self) -> str: # noqa: D102
2324
return "db.sqlite"
2425

2526
def prepare_writing(self, docnames: Set[str]) -> None: # noqa: D102
26-
pass
27+
from atsphinx.sqlite3fts.models import Content, Document, Section
28+
29+
Document.truncate_table(cascade=True)
30+
Section.truncate_table(cascade=True)
31+
Content.truncate_table(cascade=True)
2732

2833
def write_doc(self, docname: str, doctree: nodes.document) -> None:
2934
"""Register content of document into database.

src/atsphinx/sqlite3fts/events.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,23 @@ def _generate_search_html(app: Sphinx):
3333
app.connect("html-collect-pages", _generate_search_html)
3434

3535

36-
def configure_database(app: Sphinx):
37-
"""Connect database for project output."""
36+
def configure_database(app: Sphinx, config: Config):
37+
"""
38+
Connect database for project output.
39+
40+
TODO: Add support for multiple database backends?
41+
"""
42+
# SQLite
43+
"""
3844
db_path = Path(app.outdir) / "db.sqlite"
3945
if db_path.exists():
4046
db_path.unlink()
41-
models.initialize(db_path)
47+
models.initialize("sqlite", db_path)
48+
"""
49+
# PostgreSQL
50+
if not app.config.sqlite3fts_database_url:
51+
raise ValueError("Configuring database failed")
52+
models.initialize("postgresql", app.config.sqlite3fts_database_url)
4253

4354

4455
def register_document(

src/atsphinx/sqlite3fts/models.py

Lines changed: 63 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,47 +6,52 @@
66
77
TODO: Add support for multiple database backends?
88
"""
9+
import os
910
from pathlib import Path
1011
from typing import Iterable
1112

12-
from playhouse import sqlite_ext
13+
from peewee import SQL, fn
14+
from playhouse import postgres_ext as ext
1315

14-
db_proxy = sqlite_ext.DatabaseProxy()
16+
from atsphinx.sqlite3fts.playhouse import TSVectorFieldPlus
1517

18+
db_proxy = ext.DatabaseProxy()
1619

17-
class Document(sqlite_ext.Model):
20+
21+
class Document(ext.Model):
1822
"""Document main model."""
1923

20-
page = sqlite_ext.TextField(null=False, unique=True)
21-
title = sqlite_ext.TextField(null=False)
24+
page = ext.TextField(null=False, unique=True)
25+
title = ext.TextField(null=False)
2226

2327
class Meta: # noqa: D106
2428
database = db_proxy
2529

2630

27-
class Section(sqlite_ext.Model):
31+
class Section(ext.Model):
2832
"""Section unit of document."""
2933

30-
document = sqlite_ext.ForeignKeyField(Document)
31-
root = sqlite_ext.BooleanField(default=False, null=False)
32-
ref = sqlite_ext.TextField(null=False)
33-
title = sqlite_ext.TextField(null=False)
34-
body = sqlite_ext.TextField(null=False)
34+
document = ext.ForeignKeyField(Document)
35+
root = ext.BooleanField(default=False, null=False)
36+
ref = ext.TextField(null=False)
37+
title = ext.TextField(null=False)
38+
body = ext.TextField(null=False)
3539

3640
class Meta: # noqa: D106
3741
database = db_proxy
3842

3943

40-
class Content(sqlite_ext.FTS5Model):
44+
class Content(ext.Model):
4145
"""Searching model."""
4246

43-
rowid = sqlite_ext.RowIDField()
44-
title = sqlite_ext.SearchField()
45-
body = sqlite_ext.SearchField()
47+
rowid = ext.IntegerField()
48+
title = TSVectorFieldPlus()
49+
body = TSVectorFieldPlus()
4650

4751
class Meta: # noqa: D106
4852
database = db_proxy
49-
options = {"tokenize": "trigram"}
53+
# TODO: This is an option from SQLite, it does not work on other DBMS.
54+
# options = {"tokenize": "trigram"}
5055

5156

5257
def store_document(document: Document, sections: Iterable[Section]):
@@ -58,32 +63,68 @@ def store_document(document: Document, sections: Iterable[Section]):
5863
Content.insert(
5964
{
6065
Content.rowid: section.id,
61-
Content.title: section.title or document.title,
62-
Content.body: section.body,
66+
Content.title: fn.to_tsvector(section.title or document.title),
67+
Content.body: fn.to_tsvector(section.body),
6368
}
6469
).execute()
6570

6671

6772
def search_documents(keyword: str) -> Iterable[Section]:
6873
"""Search documents from keyword by full-text-search."""
74+
# SQLite.
75+
"""
6976
return (
7077
Section.select()
7178
.join(Content, on=(Section.id == Content.rowid))
7279
.where(Content.match(keyword))
7380
.order_by(Content.bm25())
7481
)
82+
"""
83+
84+
# PostgreSQL.
85+
# https://www.postgresql.org/docs/current/textsearch-controls.html
86+
# https://stackoverflow.com/questions/25033184/postgresql-full-text-search-performance-not-acceptable-when-ordering-by-ts-rank/25245291#25245291
87+
return (
88+
Section.select(
89+
Section,
90+
fn.ts_rank_cd(Content.title, fn.websearch_to_tsquery(keyword), 32).alias(
91+
"rank_title"
92+
),
93+
fn.ts_rank_cd(Content.body, fn.websearch_to_tsquery(keyword), 32).alias(
94+
"rank_body"
95+
),
96+
)
97+
.join(Content, on=(Section.id == Content.rowid))
98+
.where(
99+
Content.title.match(keyword, web=True)
100+
| Content.body.match(keyword, web=True)
101+
)
102+
.order_by(
103+
SQL("rank_title").desc(),
104+
SQL("rank_body").desc(),
105+
)
106+
)
75107

76108

77-
def bind(db_path: Path):
109+
def bind(db_type: str, db_path: Path):
78110
"""Bind connection.
79111
80112
This works only set db into proxy, not included creating tables.
81113
"""
82-
db = sqlite_ext.SqliteExtDatabase(db_path)
114+
if db_type == "sqlite":
115+
db = ext.SqliteExtDatabase(db_path)
116+
elif db_type == "postgresql":
117+
db = ext.PostgresqlExtDatabase(db_path)
118+
if "POSTGRES_LOG_STATEMENT" in os.environ:
119+
db.execute_sql(
120+
f"SET log_statement='{os.environ['POSTGRES_LOG_STATEMENT']}';"
121+
)
122+
else:
123+
raise ValueError(f"Unknown database type: {db_type}")
83124
db_proxy.initialize(db)
84125

85126

86-
def initialize(db_path: Path):
127+
def initialize(db_type: str, db_path: Path):
87128
"""Bind connection and create tables."""
88-
bind(db_path)
129+
bind(db_type, db_path)
89130
db_proxy.create_tables([Document, Section, Content])

src/atsphinx/sqlite3fts/playhouse.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""Peewee/Playhouse extension."""
2+
from peewee import Expression, Field, TextField, fn
3+
from playhouse.postgres_ext import TS_MATCH, IndexedFieldMixin
4+
5+
6+
class TSVectorFieldPlus(IndexedFieldMixin, TextField):
7+
"""An advanced `TSVectorField`, capable to use `websearch_to_tsquery`."""
8+
9+
field_type = "TSVECTOR"
10+
__hash__ = Field.__hash__
11+
12+
def match(self, query, language=None, plain=False, web=False):
13+
"""Run match."""
14+
params = (language, query) if language is not None else (query,)
15+
func = fn.plainto_tsquery if plain else fn.to_tsquery
16+
if web:
17+
func = fn.websearch_to_tsquery
18+
return Expression(self, TS_MATCH, func(*params))

tests/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Test package."""

tests/conftest.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,40 @@
11
"""Configuration for pytest."""
2+
import os
3+
24
import pytest
35
from sphinx.testing.path import path
46

7+
from tests.util import Database
8+
59
pytest_plugins = "sphinx.testing.fixtures"
610
collect_ignore = ["roots"]
711

812

13+
@pytest.fixture(scope="session")
14+
def database_dsn():
15+
"""Pytest fixture providing the database connection string for software tests."""
16+
return "postgresql://postgres@localhost:5432"
17+
18+
19+
@pytest.fixture(scope="session")
20+
def database(database_dsn):
21+
"""Pytest fixture returning a database wrapper object."""
22+
return Database(database_dsn)
23+
24+
25+
@pytest.fixture
26+
def conn(database):
27+
"""
28+
Pytest fixture returning a database wrapper object, with content cleared.
29+
30+
This is intended to provide each test case with a blank slate.
31+
"""
32+
if "POSTGRES_LOG_STATEMENT" in os.environ:
33+
database.execute(f"SET log_statement='{os.environ['POSTGRES_LOG_STATEMENT']}';")
34+
database.reset()
35+
return database
36+
37+
938
@pytest.fixture(scope="session")
1039
def rootdir():
1140
"""Set root directory to use testing sphinx project."""

tests/roots/test-default/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
"atsphinx.sqlite3fts",
44
]
55

6+
sqlite3fts_database_url = "postgresql://postgres@localhost:5432"
7+
68
# To skip toctree
79
rst_prolog = """
810
:orphan:

tests/test_builders.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,11 @@
11
"""Test cases for custom builders."""
2-
import sqlite3
3-
from pathlib import Path
4-
52
import pytest
63
from sphinx.testing.util import SphinxTestApp
74

85

9-
@pytest.mark.sphinx("sqlite", testroot="default")
10-
def test___work_builder(app: SphinxTestApp, status, warning): # noqa
6+
@pytest.mark.sphinx("fts-index", testroot="default")
7+
def test___work_builder(app: SphinxTestApp, status, warning, conn): # noqa
118
app.build()
12-
db_path = Path(app.outdir) / "db.sqlite"
13-
assert db_path.exists()
14-
conn = sqlite3.connect(db_path)
159
assert len(conn.execute("SELECT * FROM document").fetchall()) > 0
1610
assert len(conn.execute("SELECT * FROM section").fetchall()) > 0
1711
assert len(conn.execute("SELECT * FROM content").fetchall()) > 0

0 commit comments

Comments
 (0)