From 31139484b19235ed3ca7418ec5e79da84b06fbc5 Mon Sep 17 00:00:00 2001 From: Martin Mares <mj@ucw.cz> Date: Thu, 19 Aug 2021 23:25:23 +0200 Subject: [PATCH] =?UTF-8?q?DB:=20Index=20na=20case-insensitive=20accent-in?= =?UTF-8?q?sensitive=20hled=C3=A1n=C3=AD=20ve=20jm=C3=A9nech=20m=C3=ADst?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db/db.ddl | 12 ++++++++++++ db/upgrade-20210819.sql | 12 ++++++++++++ mo/db.py | 6 ++++++ mo/web/api.py | 5 ++--- 4 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 db/upgrade-20210819.sql diff --git a/db/db.ddl b/db/db.ddl index 9979743c..ac7d0dc6 100644 --- a/db/db.ddl +++ b/db/db.ddl @@ -1,9 +1,19 @@ -- CREATE ROLE mo_osmo LOGIN PASSWORD 'pass'; -- CREATE DATABASE mo_osmo WITH OWNER=mo_osmo; -- GRANT mo_osmo TO some_admin; +-- CREATE EXTENSION unaccent; SET ROLE mo_osmo; +-- Funkce pro odakcentování textu pomocí extension unaccent. +-- Je immutable, takže se dá používat i v indexech. +-- Zdroj: http://stackoverflow.com/questions/11005036/does-postgresql-support-accent-insensitive-collations +CREATE OR REPLACE FUNCTION f_unaccent(text) + RETURNS text AS +$func$ +SELECT unaccent('unaccent', $1) +$func$ LANGUAGE sql IMMUTABLE SET search_path = public, pg_temp; + -- Uživatelský účet CREATE TABLE users ( user_id serial PRIMARY KEY, @@ -45,6 +55,8 @@ CREATE TABLE places ( ); CREATE INDEX places_parent_index ON places (parent); +-- XXX: Potřebujeme operator class text_pattern_ops, aby index fungoval na prefixové LIKE +CREATE INDEX places_noacc_index ON places ((lower(f_unaccent(name))) text_pattern_ops); -- Rekurzivní dotaz na nadřazené regiony: -- WITH RECURSIVE parent_regions(parent, place_id) AS ( diff --git a/db/upgrade-20210819.sql b/db/upgrade-20210819.sql new file mode 100644 index 00000000..5fa30fbd --- /dev/null +++ b/db/upgrade-20210819.sql @@ -0,0 +1,12 @@ +SET ROLE 'mo_osmo'; + +-- Musí udělat správce (ale uvnitř naší DB): +-- CREATE EXTENSION unaccent; + +CREATE OR REPLACE FUNCTION f_unaccent(text) + RETURNS text AS +$func$ +SELECT unaccent('unaccent', $1) +$func$ LANGUAGE sql IMMUTABLE SET search_path = public, pg_temp; + +CREATE INDEX places_noacc_index ON places ((lower(f_unaccent(name))) text_pattern_ops); diff --git a/mo/db.py b/mo/db.py index ed2b0356..ab9c17a3 100644 --- a/mo/db.py +++ b/mo/db.py @@ -16,6 +16,7 @@ from sqlalchemy.orm.attributes import get_history from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.sql.expression import CTE +from sqlalchemy.sql.functions import ReturnTypeFromArgs from sqlalchemy.sql.sqltypes import Numeric from typing import Optional, List, Tuple @@ -34,6 +35,11 @@ else: from sqlalchemy import Enum +# Funkce f_unaccent je definovaná v db.ddl, naučme Alchymii volat ji: +class f_unaccent(ReturnTypeFromArgs): + pass + + Base = declarative_base() metadata = Base.metadata diff --git a/mo/web/api.py b/mo/web/api.py index 9b1f3306..b6ba68ba 100644 --- a/mo/web/api.py +++ b/mo/web/api.py @@ -1,5 +1,6 @@ from flask import request from flask.json import jsonify +from sqlalchemy import func from sqlalchemy.orm import joinedload import werkzeug.exceptions @@ -22,12 +23,10 @@ def api_find_town(): elif '%' in query: return jsonify(error='Nepovolené znaky ve jménu obce.') else: - # FIXME: Hledání bez akcentů - # FIXME: Hodil by se index... max_places = 50 places = (db.get_session().query(db.Place) .filter_by(level=3) - .filter(db.Place.name.ilike(query + '%')) + .filter(func.lower(db.f_unaccent(db.Place.name)).like(func.lower(db.f_unaccent(query + '%')))) .options(joinedload(db.Place.parent_place)) .order_by(db.Place.name, db.Place.place_id) .limit(max_places) -- GitLab