Skip to content

Add backend support for PostgreSQL [WIP] #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
name: Run CI
name: Tests

on:
pull_request: ~
push:
pull_request:
branches: [ main ]

# Allow job to be triggered manually.
workflow_dispatch:

# Run job each night.
schedule:
- cron: '0 3 * * *'

# Cancel in-progress jobs when pushing to the same branch.
concurrency:
cancel-in-progress: true
Expand All @@ -30,6 +37,13 @@ jobs:
max-parallel: 4
matrix:
python-version: [3.7, 3.8, 3.9, '3.10', '3.11']
services:
postgresql:
image: postgres:16
ports:
- 5432:5432
env:
POSTGRES_HOST_AUTH_METHOD: trust
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand Down Expand Up @@ -59,6 +73,13 @@ jobs:
ls -l dist
documentation:
runs-on: ubuntu-latest
services:
postgresql:
image: postgres:16
ports:
- 5432:5432
env:
POSTGRES_HOST_AUTH_METHOD: trust
steps:
- uses: actions/checkout@v3
- name: Setup python
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Release new version
name: Release

on:
push:
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ ENV/
env.bak/
venv.bak/

# PyCharm project settings
.idea

# Spyder project settings
.spyderproject
.spyproject
Expand Down
24 changes: 23 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,33 @@ Usage

.. code-block:: console

python -m http.server -d build
python -m http.server -d _build/html

Please access http://localhost:8000/search.html


Development
===========

Install package in development mode::

pip install --editable='.[cli,docs,test]' --prefer-binary

Start PostgreSQL server::

docker run --rm -it --publish=5432:5432 --env "POSTGRES_HOST_AUTH_METHOD=trust" postgres:16 postgres -c log_statement=all

Invoke software tests::

export POSTGRES_LOG_STATEMENT=all
pytest -vvv

Invoke linters::

pip install pre-commit
pre-commit run --all-files


.. _atsphinx-sqlite3fts: https://pypi.org/project/atsphinx-sqlite3fts/
.. _Kazuya Takei: https://github.com/attakei
.. _readthedocs-sphinx-search: https://github.com/readthedocs/readthedocs-sphinx-search
Expand Down
Empty file added docs/_static/.gitkeep
Empty file.
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
}
# atsphinx-sqlite3fts
sqlite3fts_use_search_html = True
sqlite3fts_database_url = "postgresql://postgres@localhost:5432"


def setup(app): # noqa: D103
Expand Down
7 changes: 5 additions & 2 deletions docs/getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,11 @@ You can build database by ``sqlite`` builder.

.. code-block:: console

make sqlite
sqlite3 _build/sqlite/db.sqlite
make fts-index

.. code-block:: console

psql postgresql://postgres@localhost:5432/ --command 'SELECT * FROM document;'

.. code-block:: sqlite3

Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ dynamic = ["version", "description"]
dependencies = [
"docutils",
"peewee",
"Sphinx",
"psycopg2[binary]",
"sphinx<7",
]

[project.optional-dependencies]
Expand Down
7 changes: 4 additions & 3 deletions src/atsphinx/sqlite3fts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Sphinx document searcher using SQLite3."""
"""Sphinx document searcher using SQL database."""
from sphinx.application import Sphinx

from . import builders, events
Expand All @@ -10,9 +10,10 @@ def setup(app: Sphinx):
"""Entrypoint as Sphinx extension."""
app.add_config_value("sqlite3fts_exclude_pages", [], "env")
app.add_config_value("sqlite3fts_use_search_html", False, "env")
app.add_builder(builders.SqliteBuilder)
app.add_config_value("sqlite3fts_database_url", None, "env")
app.add_builder(builders.FtsIndexer)
app.connect("config-inited", events.setup_search_html)
app.connect("builder-inited", events.configure_database)
app.connect("config-inited", events.configure_database)
app.connect("html-page-context", events.register_document)
app.connect("build-finished", events.save_database)
return {
Expand Down
15 changes: 10 additions & 5 deletions src/atsphinx/sqlite3fts/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
from . import models, services


class SqliteBuilder(Builder):
"""Single database generation builder.
class FtsIndexer(Builder):
"""
Fulltext index builder for databases.

This is custom builder to generate only SQLite database file
A custom builder to generate fulltext indexes, stored in SQL databases.
"""

name = "sqlite"
name = "fts-index"
allow_parallel = True

def get_target_uri(self, docname: str, typ: str = None) -> str: # noqa: D102
Expand All @@ -23,7 +24,11 @@ def get_outdated_docs(self) -> str: # noqa: D102
return "db.sqlite"

def prepare_writing(self, docnames: Set[str]) -> None: # noqa: D102
pass
from atsphinx.sqlite3fts.models import Content, Document, Section

Document.truncate_table(cascade=True)
Section.truncate_table(cascade=True)
Content.truncate_table(cascade=True)

def write_doc(self, docname: str, doctree: nodes.document) -> None:
"""Register content of document into database.
Expand Down
17 changes: 14 additions & 3 deletions src/atsphinx/sqlite3fts/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,23 @@ def _generate_search_html(app: Sphinx):
app.connect("html-collect-pages", _generate_search_html)


def configure_database(app: Sphinx):
"""Connect database for project output."""
def configure_database(app: Sphinx, config: Config):
"""
Connect database for project output.

TODO: Add support for multiple database backends?
"""
# SQLite
"""
db_path = Path(app.outdir) / "db.sqlite"
if db_path.exists():
db_path.unlink()
models.initialize(db_path)
models.initialize("sqlite", db_path)
"""
# PostgreSQL
if not app.config.sqlite3fts_database_url:
raise ValueError("Configuring database failed")
models.initialize("postgresql", app.config.sqlite3fts_database_url)


def register_document(
Expand Down
85 changes: 63 additions & 22 deletions src/atsphinx/sqlite3fts/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,47 +6,52 @@

TODO: Add support for multiple database backends?
"""
import os
from pathlib import Path
from typing import Iterable

from playhouse import sqlite_ext
from peewee import SQL, fn
from playhouse import postgres_ext as ext

db_proxy = sqlite_ext.DatabaseProxy()
from atsphinx.sqlite3fts.playhouse import TSVectorFieldPlus

db_proxy = ext.DatabaseProxy()

class Document(sqlite_ext.Model):

class Document(ext.Model):
"""Document main model."""

page = sqlite_ext.TextField(null=False, unique=True)
title = sqlite_ext.TextField(null=False)
page = ext.TextField(null=False, unique=True)
title = ext.TextField(null=False)

class Meta: # noqa: D106
database = db_proxy


class Section(sqlite_ext.Model):
class Section(ext.Model):
"""Section unit of document."""

document = sqlite_ext.ForeignKeyField(Document)
root = sqlite_ext.BooleanField(default=False, null=False)
ref = sqlite_ext.TextField(null=False)
title = sqlite_ext.TextField(null=False)
body = sqlite_ext.TextField(null=False)
document = ext.ForeignKeyField(Document)
root = ext.BooleanField(default=False, null=False)
ref = ext.TextField(null=False)
title = ext.TextField(null=False)
body = ext.TextField(null=False)

class Meta: # noqa: D106
database = db_proxy


class Content(sqlite_ext.FTS5Model):
class Content(ext.Model):
"""Searching model."""

rowid = sqlite_ext.RowIDField()
title = sqlite_ext.SearchField()
body = sqlite_ext.SearchField()
rowid = ext.IntegerField()
title = TSVectorFieldPlus()
body = TSVectorFieldPlus()

class Meta: # noqa: D106
database = db_proxy
options = {"tokenize": "trigram"}
# TODO: This is an option from SQLite, it does not work on other DBMS.
# options = {"tokenize": "trigram"}


def store_document(document: Document, sections: Iterable[Section]):
Expand All @@ -58,32 +63,68 @@ def store_document(document: Document, sections: Iterable[Section]):
Content.insert(
{
Content.rowid: section.id,
Content.title: section.title or document.title,
Content.body: section.body,
Content.title: fn.to_tsvector(section.title or document.title),
Content.body: fn.to_tsvector(section.body),
}
).execute()


def search_documents(keyword: str) -> Iterable[Section]:
"""Search documents from keyword by full-text-search."""
# SQLite.
"""
return (
Section.select()
.join(Content, on=(Section.id == Content.rowid))
.where(Content.match(keyword))
.order_by(Content.bm25())
)
"""

# PostgreSQL.
# https://www.postgresql.org/docs/current/textsearch-controls.html
# https://stackoverflow.com/questions/25033184/postgresql-full-text-search-performance-not-acceptable-when-ordering-by-ts-rank/25245291#25245291
return (
Section.select(
Section,
fn.ts_rank_cd(Content.title, fn.websearch_to_tsquery(keyword), 32).alias(
"rank_title"
),
fn.ts_rank_cd(Content.body, fn.websearch_to_tsquery(keyword), 32).alias(
"rank_body"
),
)
.join(Content, on=(Section.id == Content.rowid))
.where(
Content.title.match(keyword, web=True)
| Content.body.match(keyword, web=True)
)
.order_by(
SQL("rank_title").desc(),
SQL("rank_body").desc(),
)
)


def bind(db_path: Path):
def bind(db_type: str, db_path: Path):
"""Bind connection.

This works only set db into proxy, not included creating tables.
"""
db = sqlite_ext.SqliteExtDatabase(db_path)
if db_type == "sqlite":
db = ext.SqliteExtDatabase(db_path)
elif db_type == "postgresql":
db = ext.PostgresqlExtDatabase(db_path)
if "POSTGRES_LOG_STATEMENT" in os.environ:
db.execute_sql(
f"SET log_statement='{os.environ['POSTGRES_LOG_STATEMENT']}';"
)
else:
raise ValueError(f"Unknown database type: {db_type}")
db_proxy.initialize(db)


def initialize(db_path: Path):
def initialize(db_type: str, db_path: Path):
"""Bind connection and create tables."""
bind(db_path)
bind(db_type, db_path)
db_proxy.create_tables([Document, Section, Content])
18 changes: 18 additions & 0 deletions src/atsphinx/sqlite3fts/playhouse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Peewee/Playhouse extension."""
from peewee import Expression, Field, TextField, fn
from playhouse.postgres_ext import TS_MATCH, IndexedFieldMixin


class TSVectorFieldPlus(IndexedFieldMixin, TextField):
"""An advanced `TSVectorField`, capable to use `websearch_to_tsquery`."""

field_type = "TSVECTOR"
__hash__ = Field.__hash__

def match(self, query, language=None, plain=False, web=False):
"""Run match."""
params = (language, query) if language is not None else (query,)
func = fn.plainto_tsquery if plain else fn.to_tsquery
if web:
func = fn.websearch_to_tsquery
return Expression(self, TS_MATCH, func(*params))
1 change: 1 addition & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Test package."""
Loading