From 31139484b19235ed3ca7418ec5e79da84b06fbc5 Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Thu, 19 Aug 2021 23:25:23 +0200
Subject: [PATCH] =?UTF-8?q?DB:=20Index=20na=20case-insensitive=20accent-in?=
 =?UTF-8?q?sensitive=20hled=C3=A1n=C3=AD=20ve=20jm=C3=A9nech=20m=C3=ADst?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 db/db.ddl               | 12 ++++++++++++
 db/upgrade-20210819.sql | 12 ++++++++++++
 mo/db.py                |  6 ++++++
 mo/web/api.py           |  5 ++---
 4 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 db/upgrade-20210819.sql

diff --git a/db/db.ddl b/db/db.ddl
index 9979743c..ac7d0dc6 100644
--- a/db/db.ddl
+++ b/db/db.ddl
@@ -1,9 +1,19 @@
 -- CREATE ROLE mo_osmo LOGIN PASSWORD 'pass';
 -- CREATE DATABASE mo_osmo WITH OWNER=mo_osmo;
 -- GRANT mo_osmo TO some_admin;
+-- CREATE EXTENSION unaccent;
 
 SET ROLE mo_osmo;
 
+-- Funkce pro odakcentování textu pomocí extension unaccent.
+-- Je immutable, takže se dá používat i v indexech.
+-- Zdroj: http://stackoverflow.com/questions/11005036/does-postgresql-support-accent-insensitive-collations
+CREATE OR REPLACE FUNCTION f_unaccent(text)
+  RETURNS text AS
+$func$
+SELECT unaccent('unaccent', $1)
+$func$  LANGUAGE sql IMMUTABLE SET search_path = public, pg_temp;
+
 -- Uživatelský účet
 CREATE TABLE users (
 	user_id		serial		PRIMARY KEY,
@@ -45,6 +55,8 @@ CREATE TABLE places (
 );
 
 CREATE INDEX places_parent_index ON places (parent);
+-- XXX: Potřebujeme operator class text_pattern_ops, aby index fungoval na prefixové LIKE
+CREATE INDEX places_noacc_index ON places ((lower(f_unaccent(name))) text_pattern_ops);
 
 -- Rekurzivní dotaz na nadřazené regiony:
 --	WITH RECURSIVE parent_regions(parent, place_id) AS (
diff --git a/db/upgrade-20210819.sql b/db/upgrade-20210819.sql
new file mode 100644
index 00000000..5fa30fbd
--- /dev/null
+++ b/db/upgrade-20210819.sql
@@ -0,0 +1,12 @@
+SET ROLE 'mo_osmo';
+
+-- Musí udělat správce (ale uvnitř naší DB):
+-- CREATE EXTENSION unaccent;
+
+CREATE OR REPLACE FUNCTION f_unaccent(text)
+  RETURNS text AS
+$func$
+SELECT unaccent('unaccent', $1)
+$func$  LANGUAGE sql IMMUTABLE SET search_path = public, pg_temp;
+
+CREATE INDEX places_noacc_index ON places ((lower(f_unaccent(name))) text_pattern_ops);
diff --git a/mo/db.py b/mo/db.py
index ed2b0356..ab9c17a3 100644
--- a/mo/db.py
+++ b/mo/db.py
@@ -16,6 +16,7 @@ from sqlalchemy.orm.attributes import get_history
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.sql.expression import CTE
+from sqlalchemy.sql.functions import ReturnTypeFromArgs
 from sqlalchemy.sql.sqltypes import Numeric
 from typing import Optional, List, Tuple
 
@@ -34,6 +35,11 @@ else:
     from sqlalchemy import Enum
 
 
+# Funkce f_unaccent je definovaná v db.ddl, naučme Alchymii volat ji:
+class f_unaccent(ReturnTypeFromArgs):
+    pass
+
+
 Base = declarative_base()
 metadata = Base.metadata
 
diff --git a/mo/web/api.py b/mo/web/api.py
index 9b1f3306..b6ba68ba 100644
--- a/mo/web/api.py
+++ b/mo/web/api.py
@@ -1,5 +1,6 @@
 from flask import request
 from flask.json import jsonify
+from sqlalchemy import func
 from sqlalchemy.orm import joinedload
 import werkzeug.exceptions
 
@@ -22,12 +23,10 @@ def api_find_town():
     elif '%' in query:
         return jsonify(error='Nepovolené znaky ve jménu obce.')
     else:
-        # FIXME: Hledání bez akcentů
-        # FIXME: Hodil by se index...
         max_places = 50
         places = (db.get_session().query(db.Place)
                   .filter_by(level=3)
-                  .filter(db.Place.name.ilike(query + '%'))
+                  .filter(func.lower(db.f_unaccent(db.Place.name)).like(func.lower(db.f_unaccent(query + '%'))))
                   .options(joinedload(db.Place.parent_place))
                   .order_by(db.Place.name, db.Place.place_id)
                   .limit(max_places)
-- 
GitLab