PostgreSQL：不區分大小寫的字符串比較

11

首先，不要做什麼，不要使用ILIKE ...

create table y 
(
id serial not null, 
email text not null unique 
); 

insert into y(email) 
values('[email protected]') ,('[email protected]'); 
insert into y(email) 
select n from generate_series(1,1000) as i(n); 
create index ix_y on y(email); 

explain select * from y 
where email ilike 
    ANY(ARRAY['[email protected]','[email protected]']);

執行計劃：

memdb=# explain select * from y where email ilike ANY(ARRAY['[email protected]','[email protected]']); 
             QUERY PLAN          
---------------------------------------------------------------------------------------- 
Seq Scan on y (cost=0.00..17.52 rows=1 width=7) 
    Filter: (email ~~* ANY ('{[email protected],[email protected]com}'::text[])) 
(2 rows)

這是不是您創建一個索引低表達...

create function lower(t text[]) returns text[] 
as 
$$ 
select lower($1::text)::text[] 
$$ language sql; 

create unique index ix_y_2 on y(lower(email)); 

explain select * from y 
where lower(email) = 
    ANY(lower(ARRAY['[email protected]','[email protected]']));

......正確使用指數：

memdb=# explain select * from y where lower(email) = ANY(lower(ARRAY['[email protected]','[email protected]'])); 
                  QUERY PLAN               
-------------------------------------------------------------------------------------------------------------------------------- 
Bitmap Heap Scan on y (cost=22.60..27.98 rows=10 width=7) 
    Recheck Cond: (lower(email) = ANY ((lower(('{[email protected],[email protected]}'::text[])::text))::text[])) 
    -> Bitmap Index Scan on ix_y_2 (cost=0.00..22.60 rows=10 width=0) 
     Index Cond: (lower(email) = ANY ((lower(('{[email protected],[email protected]}'::text[])::text))::text[])) 
(4 rows)

或者你用citext數據類型...

create table x 
(
id serial not null, 
email citext not null unique 
); 

insert into x(email) 
values('[email protected]'),('[email protected]'); 
insert into x(email) 
select n from generate_series(1,1000) as i(n); 
create index ix_x on x(email); 

explain select * from x 
where email = 
ANY(ARRAY['[email protected]','[email protected]']::citext[]);

...它正確使用索引，即使你沒有創建一個表達指數（例如創建YYY ZZZ指數（較低（場）））：

memdb=# explain select * from x where email = ANY(ARRAY['[email protected]','[email protected]']::citext[]); 
              QUERY PLAN            
-------------------------------------------------------------------------------------------------- 
Bitmap Heap Scan on x (cost=8.52..12.75 rows=2 width=7) 
    Recheck Cond: (email = ANY ('{[email protected],[email protected]}'::citext[])) 
    -> Bitmap Index Scan on ix_x (cost=0.00..8.52 rows=2 width=0) 
     Index Cond: (email = ANY ('{[email protected],[email protected]}'::citext[])) 
(4 rows)

來源

2013-05-07 11:49:21

+0

請注意，當使用trigram索引時，您可以使'ILIKE'使用索引：https://www.postgresql.org/docs/current/static /pgtrgm.html（儘管B-Tree索引的更新速度會更快，但也會更快） – 2017-01-17 07:17:56

+0

如果您將其聲明爲「唯一」，那麼您也不需要在「email」上創建索引，該索引已經創建它是一個索引。 – 2017-01-17 07:36:19

9

使用不區分大小寫的文本數據類型。使用citext：

create table emails 
(
user_id int references users(user_id) 
email citext 
); 

insert into emails(user_id, email) values(1, '[email protected]'); 
insert into emails(user_id, email) values(2, '[email protected]'); 

select * from emails where email in ('[email protected]','[email protected]');

如果你不能找到你的contrib目錄citext.sql，複製並粘貼此在你的pgAdmin：

/* $PostgreSQL: pgsql/contrib/citext/citext.sql.in,v 1.3 2008/09/05 18:25:16 tgl Exp $ */ 

-- Adjust this setting to control where the objects get created. 
SET search_path = public; 

-- 
-- PostgreSQL code for CITEXT. 
-- 
-- Most I/O functions, and a few others, piggyback on the "text" type 
-- functions via the implicit cast to text. 
-- 

-- 
-- Shell type to keep things a bit quieter. 
-- 

CREATE TYPE citext; 

-- 
-- Input and output functions. 
-- 
CREATE OR REPLACE FUNCTION citextin(cstring) 
RETURNS citext 
AS 'textin' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citextout(citext) 
RETURNS cstring 
AS 'textout' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citextrecv(internal) 
RETURNS citext 
AS 'textrecv' 
LANGUAGE internal STABLE STRICT; 

CREATE OR REPLACE FUNCTION citextsend(citext) 
RETURNS bytea 
AS 'textsend' 
LANGUAGE internal STABLE STRICT; 

-- 
-- The type itself. 
-- 

CREATE TYPE citext (
    INPUT   = citextin, 
    OUTPUT   = citextout, 
    RECEIVE  = citextrecv, 
    SEND   = citextsend, 
    INTERNALLENGTH = VARIABLE, 
    STORAGE  = extended, 
    -- make it a non-preferred member of string type category 
    CATEGORY  = 'S', 
    PREFERRED  = false 
); 

-- 
-- Type casting functions for those situations where the I/O casts don't 
-- automatically kick in. 
-- 

CREATE OR REPLACE FUNCTION citext(bpchar) 
RETURNS citext 
AS 'rtrim1' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext(boolean) 
RETURNS citext 
AS 'booltext' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext(inet) 
RETURNS citext 
AS 'network_show' 
LANGUAGE internal IMMUTABLE STRICT; 

-- 
-- Implicit and assignment type casts. 
-- 

CREATE CAST (citext AS text) WITHOUT FUNCTION AS IMPLICIT; 
CREATE CAST (citext AS varchar) WITHOUT FUNCTION AS IMPLICIT; 
CREATE CAST (citext AS bpchar) WITHOUT FUNCTION AS ASSIGNMENT; 
CREATE CAST (text AS citext) WITHOUT FUNCTION AS ASSIGNMENT; 
CREATE CAST (varchar AS citext) WITHOUT FUNCTION AS ASSIGNMENT; 
CREATE CAST (bpchar AS citext) WITH FUNCTION citext(bpchar) AS ASSIGNMENT; 
CREATE CAST (boolean AS citext) WITH FUNCTION citext(boolean) AS ASSIGNMENT; 
CREATE CAST (inet AS citext) WITH FUNCTION citext(inet) AS ASSIGNMENT; 

-- 
-- Operator Functions. 
-- 

CREATE OR REPLACE FUNCTION citext_eq(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_ne(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_lt(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_le(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_gt(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_ge(citext, citext) 
RETURNS bool 
AS '$libdir/citext' 
LANGUAGE C IMMUTABLE STRICT; 

-- 
-- Operators. 
-- 

CREATE OPERATOR = (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    COMMUTATOR = =, 
    NEGATOR = <>, 
    PROCEDURE = citext_eq, 
    RESTRICT = eqsel, 
    JOIN  = eqjoinsel, 
    HASHES, 
    MERGES 
); 

CREATE OPERATOR <> (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    NEGATOR = =, 
    COMMUTATOR = <>, 
    PROCEDURE = citext_ne, 
    RESTRICT = neqsel, 
    JOIN  = neqjoinsel 
); 

CREATE OPERATOR < (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    NEGATOR = >=, 
    COMMUTATOR = >, 
    PROCEDURE = citext_lt, 
    RESTRICT = scalarltsel, 
    JOIN  = scalarltjoinsel 
); 

CREATE OPERATOR <= (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    NEGATOR = >, 
    COMMUTATOR = >=, 
    PROCEDURE = citext_le, 
    RESTRICT = scalarltsel, 
    JOIN  = scalarltjoinsel 
); 

CREATE OPERATOR >= (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    NEGATOR = <, 
    COMMUTATOR = <=, 
    PROCEDURE = citext_ge, 
    RESTRICT = scalargtsel, 
    JOIN  = scalargtjoinsel 
); 

CREATE OPERATOR > (
    LEFTARG = CITEXT, 
    RIGHTARG = CITEXT, 
    NEGATOR = <=, 
    COMMUTATOR = <, 
    PROCEDURE = citext_gt, 
    RESTRICT = scalargtsel, 
    JOIN  = scalargtjoinsel 
); 

-- 
-- Support functions for indexing. 
-- 

CREATE OR REPLACE FUNCTION citext_cmp(citext, citext) 
RETURNS int4 
AS '$libdir/citext' 
LANGUAGE C STRICT IMMUTABLE; 

CREATE OR REPLACE FUNCTION citext_hash(citext) 
RETURNS int4 
AS '$libdir/citext' 
LANGUAGE C STRICT IMMUTABLE; 

-- 
-- The btree indexing operator class. 
-- 

CREATE OPERATOR CLASS citext_ops 
DEFAULT FOR TYPE CITEXT USING btree AS 
    OPERATOR 1 < (citext, citext), 
    OPERATOR 2 <= (citext, citext), 
    OPERATOR 3 = (citext, citext), 
    OPERATOR 4 >= (citext, citext), 
    OPERATOR 5 > (citext, citext), 
    FUNCTION 1 citext_cmp(citext, citext); 

-- 
-- The hash indexing operator class. 
-- 

CREATE OPERATOR CLASS citext_ops 
DEFAULT FOR TYPE citext USING hash AS 
    OPERATOR 1 = (citext, citext), 
    FUNCTION 1 citext_hash(citext); 

-- 
-- Aggregates. 
-- 

CREATE OR REPLACE FUNCTION citext_smaller(citext, citext) 
RETURNS citext 
AS '$libdir/citext' 
LANGUAGE 'C' IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION citext_larger(citext, citext) 
RETURNS citext 
AS '$libdir/citext' 
LANGUAGE 'C' IMMUTABLE STRICT; 

CREATE AGGREGATE min(citext) (
    SFUNC = citext_smaller, 
    STYPE = citext, 
    SORTOP = < 
); 

CREATE AGGREGATE max(citext) (
    SFUNC = citext_larger, 
    STYPE = citext, 
    SORTOP = > 
); 

-- 
-- CITEXT pattern matching. 
-- 

CREATE OR REPLACE FUNCTION texticlike(citext, citext) 
RETURNS bool AS 'texticlike' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticnlike(citext, citext) 
RETURNS bool AS 'texticnlike' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticregexeq(citext, citext) 
RETURNS bool AS 'texticregexeq' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticregexne(citext, citext) 
RETURNS bool AS 'texticregexne' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OPERATOR ~ (
    PROCEDURE = texticregexeq, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = !~, 
    RESTRICT = icregexeqsel, 
    JOIN  = icregexeqjoinsel 
); 

CREATE OPERATOR ~* (
    PROCEDURE = texticregexeq, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = !~*, 
    RESTRICT = icregexeqsel, 
    JOIN  = icregexeqjoinsel 
); 

CREATE OPERATOR !~ (
    PROCEDURE = texticregexne, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = ~, 
    RESTRICT = icregexnesel, 
    JOIN  = icregexnejoinsel 
); 

CREATE OPERATOR !~* (
    PROCEDURE = texticregexne, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = ~*, 
    RESTRICT = icregexnesel, 
    JOIN  = icregexnejoinsel 
); 

CREATE OPERATOR ~~ (
    PROCEDURE = texticlike, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = !~~, 
    RESTRICT = iclikesel, 
    JOIN  = iclikejoinsel 
); 

CREATE OPERATOR ~~* (
    PROCEDURE = texticlike, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = !~~*, 
    RESTRICT = iclikesel, 
    JOIN  = iclikejoinsel 
); 

CREATE OPERATOR !~~ (
    PROCEDURE = texticnlike, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = ~~, 
    RESTRICT = icnlikesel, 
    JOIN  = icnlikejoinsel 
); 

CREATE OPERATOR !~~* (
    PROCEDURE = texticnlike, 
    LEFTARG = citext, 
    RIGHTARG = citext, 
    NEGATOR = ~~*, 
    RESTRICT = icnlikesel, 
    JOIN  = icnlikejoinsel 
); 

-- 
-- Matching citext to text. 
-- 

CREATE OR REPLACE FUNCTION texticlike(citext, text) 
RETURNS bool AS 'texticlike' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticnlike(citext, text) 
RETURNS bool AS 'texticnlike' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticregexeq(citext, text) 
RETURNS bool AS 'texticregexeq' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION texticregexne(citext, text) 
RETURNS bool AS 'texticregexne' 
LANGUAGE internal IMMUTABLE STRICT; 

CREATE OPERATOR ~ (
    PROCEDURE = texticregexeq, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = !~, 
    RESTRICT = icregexeqsel, 
    JOIN  = icregexeqjoinsel 
); 

CREATE OPERATOR ~* (
    PROCEDURE = texticregexeq, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = !~*, 
    RESTRICT = icregexeqsel, 
    JOIN  = icregexeqjoinsel 
); 

CREATE OPERATOR !~ (
    PROCEDURE = texticregexne, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = ~, 
    RESTRICT = icregexnesel, 
    JOIN  = icregexnejoinsel 
); 

CREATE OPERATOR !~* (
    PROCEDURE = texticregexne, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = ~*, 
    RESTRICT = icregexnesel, 
    JOIN  = icregexnejoinsel 
); 

CREATE OPERATOR ~~ (
    PROCEDURE = texticlike, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = !~~, 
    RESTRICT = iclikesel, 
    JOIN  = iclikejoinsel 
); 

CREATE OPERATOR ~~* (
    PROCEDURE = texticlike, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = !~~*, 
    RESTRICT = iclikesel, 
    JOIN  = iclikejoinsel 
); 

CREATE OPERATOR !~~ (
    PROCEDURE = texticnlike, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = ~~, 
    RESTRICT = icnlikesel, 
    JOIN  = icnlikejoinsel 
); 

CREATE OPERATOR !~~* (
    PROCEDURE = texticnlike, 
    LEFTARG = citext, 
    RIGHTARG = text, 
    NEGATOR = ~~*, 
    RESTRICT = icnlikesel, 
    JOIN  = icnlikejoinsel 
); 

-- 
-- Matching citext in string comparison functions. 
-- XXX TODO Ideally these would be implemented in C. 
-- 

CREATE OR REPLACE FUNCTION regexp_matches(citext, citext) RETURNS TEXT[] AS $$ 
    SELECT pg_catalog.regexp_matches($1::pg_catalog.text, $2::pg_catalog.text, 'i'); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_matches(citext, citext, text) RETURNS TEXT[] AS $$ 
    SELECT pg_catalog.regexp_matches($1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0 THEN $3 || 'i' ELSE $3 END); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_replace(citext, citext, text) returns TEXT AS $$ 
    SELECT pg_catalog.regexp_replace($1::pg_catalog.text, $2::pg_catalog.text, $3, 'i'); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_replace(citext, citext, text, text) returns TEXT AS $$ 
    SELECT pg_catalog.regexp_replace($1::pg_catalog.text, $2::pg_catalog.text, $3, CASE WHEN pg_catalog.strpos($4, 'c') = 0 THEN $4 || 'i' ELSE $4 END); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_split_to_array(citext, citext) RETURNS TEXT[] AS $$ 
    SELECT pg_catalog.regexp_split_to_array($1::pg_catalog.text, $2::pg_catalog.text, 'i'); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_split_to_array(citext, citext, text) RETURNS TEXT[] AS $$ 
    SELECT pg_catalog.regexp_split_to_array($1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0 THEN $3 || 'i' ELSE $3 END); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_split_to_table(citext, citext) RETURNS SETOF TEXT AS $$ 
    SELECT pg_catalog.regexp_split_to_table($1::pg_catalog.text, $2::pg_catalog.text, 'i'); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION regexp_split_to_table(citext, citext, text) RETURNS SETOF TEXT AS $$ 
    SELECT pg_catalog.regexp_split_to_table($1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0 THEN $3 || 'i' ELSE $3 END); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION strpos(citext, citext) RETURNS INT AS $$ 
    SELECT pg_catalog.strpos(pg_catalog.lower($1::pg_catalog.text), pg_catalog.lower($2::pg_catalog.text)); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION replace(citext, citext, citext) RETURNS TEXT AS $$ 
    SELECT pg_catalog.regexp_replace($1::pg_catalog.text, pg_catalog.regexp_replace($2::pg_catalog.text, '([^a-zA-Z_0-9])', E'\\\\\\1', 'g'), $3::pg_catalog.text, 'gi'); 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION split_part(citext, citext, int) RETURNS TEXT AS $$ 
    SELECT (pg_catalog.regexp_split_to_array($1::pg_catalog.text, pg_catalog.regexp_replace($2::pg_catalog.text, '([^a-zA-Z_0-9])', E'\\\\\\1', 'g'), 'i'))[$3]; 
$$ LANGUAGE SQL IMMUTABLE STRICT; 

CREATE OR REPLACE FUNCTION translate(citext, citext, text) RETURNS TEXT AS $$ 
    SELECT pg_catalog.translate(pg_catalog.translate($1::pg_catalog.text, pg_catalog.lower($2::pg_catalog.text), $3), pg_catalog.upper($2::pg_catalog.text), $3); 
$$ LANGUAGE SQL IMMUTABLE STRICT;

來源

2010-12-19 09:13:55

+2

'創建擴展名「citext」;'將安裝模塊 – 2014-10-30 16:59:50

38

select * 
where email ilike '[email protected]'

ilike類似於like但不區分大小寫。對於轉義字符使用replace()

where email ilike replace(replace(replace($1, '~', '~~'), '%', '~%'), '_', '~_') escape '~'

，或者你可以創建一個函數來逃避文本;用於數組文本使用

where email ilike any(array['[email protected]', '[email protected]'])

來源

2010-12-19 09:32:15 Bonshington

+0

+1'any'運營商正是我所期待的。謝謝！ – 2010-12-19 09:41:33

+3

'LIKE'和'ILIKE'與字符串相等是非常不同的，並且必要的'替換'魔術來擺脫元字符比原來的'lower'調用差得多。雖然'ILIKE'沒有打擾說明元字符常常會作爲一個快速和骯髒的一次性，我不會主張它作爲一般不區分大小寫的字符串比較。 – Ben 2012-08-15 02:38:09

+0

@Bonshington我喜歡'ILike'的想法 - 畢竟這些年來，我從來不知道它。但是，你知道這是否適用於任何語言，還是僅適用於英語和拉丁語集？謝謝！ +1上面的答案。 – itsols 2013-01-02 08:02:15

3

您也可以在lower（email）上創建索引。

來源

2010-12-19 11:41:32 peufeu

+1

會有點擊敗這個問題的目的，但提問者不想被打擾使用較低的我猜:-)使用citext的一些基本原理：http://www.depesz.com/index.php/2008/08/10/waiting-for-84-case-insensitive-text-citext/ – 2010-12-19 13:21:04

-3

Use ‘Collate SQL_Latin1_General_CP1_CS_AS’ for it. 
declare @a nvarchar(5)='a' 
declare @b nvarchar(5)='A' 

if(@[email protected] Collate SQL_Latin1_General_CP1_CS_AS) 
begin 
print 'Match' 
end 
else 
begin 
print 'Not Matched' 
end

來源

2016-01-27 05:31:58

+0

OP詢問PostgreSQL，而不是SQL Server。 – NathanAldenSr 2016-08-17 03:13:20

6

事在過去4年中已經改變，因爲這個問題得到的回答和建議，「不使用ILIKE」是不正確的任何更多（至少以這樣的一般方式）。

實際上，根據數據分佈情況，帶有trigram index的ILIKE甚至可能比citext更快。

唯一索引確實是有很大的不同，可使用邁克爾的測試設置時可以看到：

create table y 
(
    id serial not null, 
    email text not null unique 
); 

insert into y(email) 
select 'some.name'||n||'@foobar.com' 
from generate_series(1,100000) as i(n); 

-- create a trigram index to support ILIKE  
create index ix_y on y using gin (email gin_trgm_ops); 

create table x 
(
    id serial not null, 
    email citext not null unique 
); 
-- no need to create an index 
-- the UNIQUE constraint will create a regular B-Tree index 

insert into x(email) 
select email 
from y;

使用ILIKE執行計劃：

explain (analyze) 
select * 
from y 
where email ilike ANY (ARRAY['[email protected]','[email protected]']);

Bitmap Heap Scan on y (cost=126.07..154.50 rows=20 width=29) (actual time=60.696..60.818 rows=2 loops=1) 
    Recheck Cond: (email ~~* ANY ('{[email protected],[email protected]}'::text[])) 
    Rows Removed by Index Recheck: 13 
    Heap Blocks: exact=11 
    -> Bitmap Index Scan on ix_y (cost=0.00..126.07 rows=20 width=0) (actual time=60.661..60.661 rows=15 loops=1) 
     Index Cond: (email ~~* ANY ('{[email protected],[email protected]}'::text[])) 
Planning time: 0.952 ms 
Execution time: 61.004 ms

而且使用citext：

explain (analyze) 
select * 
from x 
where email = ANY (ARRAY['[email protected]','[email protected]']);

Index Scan using x_email_key on x (cost=0.42..5.85 rows=2 width=29) (actual time=0.111..0.203 rows=2 loops=1) 
    Index Cond: (email = ANY ('{[email protected],[email protected]}'::citext[])) 
Planning time: 0.115 ms 
Execution time: 0.254 ms

請注意，ILIKE查詢實際上是不同於= citext查詢，因爲ILIKE會兌現通配符。

但是，對於非唯一索引，事情看起來不同。下面的設置是基於recent question問同樣的：

create table data 
(
    group_id serial primary key, 
    name text 
); 

create table data_ci 
(
    group_id serial primary key, 
    name citext 
); 

insert into data(name) 
select 'data'||i.n 
from generate_series(1,1000) as i(n), generate_series(1,1000) as i2(n); 

insert into data_ci(group_id, name) 
select group_id, name 
from data; 

create index ix_data_gin on data using gin (name public.gin_trgm_ops); 
create index ix_data_ci on data_ci (name);

因此，我們必須在每個表一百萬行，爲name列1000點不同的值，併爲每個不同的價值，我們有1000個重複。查詢3個不同值的查詢將返回3000行。

在這種情況下的三字母組索引實質上更快則B樹索引：

explain (analyze) 
select * 
from data 
where name ilike any (array['Data1', 'data2', 'DATA3']);

Bitmap Heap Scan on data (cost=88.25..1777.61 rows=1535 width=11) (actual time=2.906..11.064 rows=3000 loops=1) 
    Recheck Cond: (name ~~* ANY ('{Data1,data2,DATA3}'::text[])) 
    Heap Blocks: exact=17 
    -> Bitmap Index Scan on ix_data_gin (cost=0.00..87.87 rows=1535 width=0) (actual time=2.869..2.869 rows=3000 loops=1) 
     Index Cond: (name ~~* ANY ('{Data1,data2,DATA3}'::text[])) 
Planning time: 2.174 ms 
Execution time: 11.282 ms

而關於citext列中的B樹索引現在使用SEQ掃描

explain analyze 
select * 
from data_ci 
where name = any (array['Data1', 'data2', 'DATA3']);

Seq Scan on data_ci (cost=0.00..10156.00 rows=2904 width=11) (actual time=0.449..304.301 rows=1000 loops=1) 
    Filter: ((name)::text = ANY ('{Data1,data2,DATA3}'::text[])) 
    Rows Removed by Filter: 999000 
Planning time: 0.152 ms 
Execution time: 304.360 ms

GIN索引的大小實際上小於citext列中的大小：

select pg_size_pretty(pg_total_relation_size('ix_data_gin')) as gin_index_size, 
     pg_size_pretty(pg_total_relation_size('ix_data_ci')) as citex_index_size

gin_index_size | citex_index_size 
---------------+----------------- 
11 MB   | 21 MB

以上使用的Postgres 9.6.1完成在Windows筆記本電腦random_page_cost設置爲1.5

來源

2017-01-17 08:14:37

PostgreSQL：不區分大小寫的字符串比較

回答

相關問題