This Blog shows LIKE condition index range scan performance and space usage of Oracle index with Wildcard '_'.
Note: Tested in Oracle 19.18.
1. Test Setup
At first, we create a test table with 3 varchar2 columns (name1, name2, name3), which are the concatenations of 3 same items,
but in different order, and then 3 respective indexes on them. The item part contains Wildcard '_'.
drop table test_tab purge;
create table test_tab (
id number
,grp number
,tstr varchar2(14)
,name1 varchar2(100)
,name2 varchar2(100)
,name3 varchar2(100)
);
create unique index test_tab#p on test_tab(id);
alter table test_tab add constraint test_tab#p primary key (id);
create index test_tab#i#name1 on test_tab (name1);
create index test_tab#i#name2 on test_tab (name2);
create index test_tab#i#name3 on test_tab (name3);
insert into test_tab
with sq as (
select level id, mod(level, 300) grp
,to_char((date'2021-11-22' + rownum/86400), 'YYYYMMDDHH24MISS') ts
,decode(mod(level, 3), 0, 'ONE_PART', 1, 'TWO_PART', 2, 'THREE_PART') part
from dual connect by level <= 3*1e5)
select id, grp, ts
,part ||'.'||grp ||'.'||ts name1
,grp ||'.'||part ||'.'||ts name2
,ts ||'.'||part ||'.'||grp name3
from sq;
commit;
exec dbms_stats.gather_table_stats('', 'TEST_TAB', cascade=>true);
If we run a query, we can see the format of three name columns:
col name1 for a30 new_value n1
col name2 for a30 new_value n2
col name3 for a30 new_value n3
select id, name1, name2, name3 from test_tab m where id = trunc(dbms_random.value(1, 3*1e5));
ID NAME1 NAME2 NAME3
------ --------------------------- --------------------------- ---------------------------
243505 TWO_PART.205.20211124193825 205.TWO_PART.20211124193825 20211124193825.TWO_PART.205
Then we create a Plsql test program using 3 indexes and printout their execution stats.
create or replace procedure test_tab_proc (p_case number, p_cnt number) as
l_start number;
l_start_cr number;
l_end_cr number;
l_name1 varchar2(100);
l_name2 varchar2(100);
l_name3 varchar2(100);
l_ret varchar2(200);
begin
select name1, name2, name3 into l_name1, l_name2, l_name3
from test_tab where id = trunc(dbms_random.value(1, 3*1e5));
l_start := dbms_utility.get_time;
select value into l_start_cr from v$mystat s, v$statname n where s.statistic#=n.statistic# and name = 'consistent gets';
case p_case
when 1 then
for i in 1..p_cnt loop
select /*+ index_rs_asc(t (name1)) */ name1 into l_ret from test_tab t where name1 like l_name1;
end loop;
dbms_output.put_line('--------- Index: name1 like '||l_name1||' --------- ');
when 2 then
for i in 1..p_cnt loop
select /*+ index_rs_asc(t (name2)) */ name2 into l_ret from test_tab t where name2 like l_name2;
end loop;
dbms_output.put_line('--------- Index: name2 like '||l_name2||' --------- ');
when 3 then
for i in 1..p_cnt loop
select /*+ index_rs_asc(t (name3)) */ name3 into l_ret from test_tab t where name3 like l_name3;
end loop;
dbms_output.put_line('--------- Index: name3 like '||l_name3||' --------- ');
end case;
select value into l_end_cr from v$mystat s, v$statname n where s.statistic#=n.statistic# and name = 'consistent gets';
dbms_output.put_line('Total Elapsed MS = '||round((dbms_utility.get_time-l_start)*10)||
', Total CR gets= ' ||(l_end_cr-l_start_cr)||
', Per Exec MS = ' ||round((dbms_utility.get_time-l_start)*10/p_cnt, 2)||
', Per Exec CR gets = '||round((l_end_cr-l_start_cr)/p_cnt));
end;
/
2. Test Run
We run 3 tests for 3 indexes with Sql trace:
alter session set tracefile_identifier = 'sql_trc_1';
alter session set events '10046 trace name context forever, level 12';
exec test_tab_proc(1, 100);
exec test_tab_proc(2, 100);
exec test_tab_proc(3, 100);
alter session set events '10046 trace name context off';
The output shows that index name1 has 902 CR gets per execution, name2 has 12, and name3 has 3,
although each of them returns one single row per execution.
SQL > exec test_tab_proc(1, 100);
--------- Index: name1 like ONE_PART.270.20211122144930 ---------
Total Elapsed MS = 1380, Total CR gets= 90200, Per Exec MS = 13.8, Per Exec CR gets = 902
SQL > exec test_tab_proc(2, 100);
--------- Index: name2 like 37.TWO_PART.20211124044537 ---------
Total Elapsed MS = 30, Total CR gets= 1200, Per Exec MS = .3, Per Exec CR gets = 12
SQL > exec test_tab_proc(3, 100);
--------- Index: name3 like 20211124141442.ONE_PART.282 ---------
Total Elapsed MS = 10, Total CR gets= 300, Per Exec MS = .1, Per Exec CR gets = 3
Here 3 xplans. They are all using INDEX RANGE SCAN, but different execution stats.
************************ INDEX RANGE SCAN TEST_TAB#I#NAME1 **************************
SELECT /*+ index_rs_asc(t (name1)) */ NAME1 FROM TEST_TAB T WHERE NAME1 LIKE :B1
call count cpu elapsed disk query current rows
------- ------ -------- ---------- ---------- ---------- ---------- ----------
Parse 1 0.00 0.00 0 0 0 0
Execute 100 0.01 0.01 0 0 0 0
Fetch 100 1.34 1.34 0 90200 0 100
------- ------ -------- ---------- ---------- ---------- ---------- ----------
total 201 1.35 1.35 0 90200 0 100
Rows Row Source Operation
---- ---------------------------------------------------
1 INDEX RANGE SCAN TEST_TAB#I#NAME1 (cr=902 pr=0 pw=0 time=17202 us starts=1 cost=2 size=29 card=1)(object id 5082850)
************************ INDEX RANGE SCAN TEST_TAB#I#NAME2 **************************
SELECT /*+ index_rs_asc(t (name2)) */ NAME2 FROM TEST_TAB T WHERE NAME2 LIKE :B1
call count cpu elapsed disk query current rows
------- ------ -------- ---------- ---------- ---------- ---------- ----------
Parse 1 0.00 0.00 0 0 0 0
Execute 100 0.00 0.00 0 0 0 0
Fetch 100 0.01 0.01 0 1200 0 100
------- ------ -------- ---------- ---------- ---------- ---------- ----------
total 201 0.02 0.02 0 1200 0 100
Rows Row Source Operation
---- ---------------------------------------------------
1 INDEX RANGE SCAN TEST_TAB#I#NAME2 (cr=12 pr=0 pw=0 time=279 us starts=1 cost=2 size=29 card=1)(object id 5082851)
************************ INDEX RANGE SCAN TEST_TAB#I#NAME3 **************************
SELECT /*+ index_rs_asc(t (name3)) */ NAME3 FROM TEST_TAB T WHERE NAME3 LIKE :B1
call count cpu elapsed disk query current rows
------- ------ -------- ---------- ---------- ---------- ---------- ----------
Parse 1 0.00 0.00 0 0 0 0
Execute 100 0.00 0.00 0 0 0 0
Fetch 100 0.00 0.00 0 300 0 100
------- ------ -------- ---------- ---------- ---------- ---------- ----------
total 201 0.00 0.00 0 300 0 100
Rows Row Source Operation
---- ---------------------------------------------------
1 INDEX RANGE SCAN TEST_TAB#I#NAME3 (cr=3 pr=0 pw=0 time=55 us starts=1 cost=2 size=29 card=1)(object id 5082852)
3. LIKE Condition Index Range Scan on Wildcard '_'Index
Following query shows that TEST_TAB#I#NAME1 and TEST_TAB#I#NAM2 have similar clustering_factor and leaf_blocks (space usage),
but TEST_TAB#I#NAME3 has much less (see later Index Treedump).
select index_name, clustering_factor, leaf_blocks, blevel from dba_indexes v where table_name = 'TEST_TAB';
INDEX_NAME CLUSTERING_FACTOR LEAF_BLOCKS BLEVEL
------------------ ----------------- ----------- --------
TEST_TAB#I#NAME1 300000 2766 2
TEST_TAB#I#NAME2 300000 2773 2
TEST_TAB#I#NAME3 4714 1478 2
We can also get the distinct number of 3 name columns before wildcard '_':
select count(*)
,count(distinct substr(name1, 1, instr(name1, '_')-1)) name1_prefix_cnt
,count(distinct substr(name2, 1, instr(name2, '_')-1)) name2_prefix_cnt
,count(distinct substr(name3, 1, instr(name3, '_')-1)) name3_prefix_cnt
from test_tab;
COUNT(*) NAME1_PREFIX_CNT NAME2_PREFIX_CNT NAME3_PREFIX_CNT
-------- ---------------- ---------------- ----------------
300000 3 300 300000
name1 in test_tab is constructed like:
ONE_PART.x.y
TWO_PART.x.y
THREE_PART.x.y
Before '_', there are 3 different values ('ONE', 'TWO', 'THREE'). So name1 is divided into three parts.
TEST_TAB#I#NAME1 has 2766 leaf blocks, TEST_TAB#I#NAME1 index range scan makes 2766/3, which is 902 CR gets per execution
(it performs like an index partition full scan).
Similarily for name2, it is 2773/300, which is about 12 CR gets per execution (index BLEVEL = 2 requires 2 branch blocks gets per execution).
And for name3, it is 3 CR gets per execution (2 branch blocks gets and 1 leaf block get).
Above tests show that all of 3 queries return one single row per execution, but TEST_TAB#I#NAME1 has a much higher cost.
If we run a query to compare the result of ordered index range scan with unordered index fast full scan with query:
with sq_rs as (select /*+ index_asc(t test_tab#i#name1) materialize */ name1, rownum rn from test_tab t where name1 like 'ONE_PART%')
,sq_ffs as (select /*+ index_ffs(t test_tab#i#name1) materialize */ name1, row_number() over(order by name1) rn from test_tab t where name1 like 'ONE_PART%')
select r.*, f.*
from sq_rs r, sq_ffs f
where r.rn = f.rn and r.name1 != f.name1
--where r.rn != f.rn and r.name1 = f.name1
;
no rows selected
"no rows selected" means that name1 in TEST_TAB#I#NAME1 is strictly sorted,
but Like condition index range scan only uses substring before '_' for the search,
therefore 1/3 TEST_TAB#I#NAME1 blocks are read although only one leaf block contains the searched data.
4. Index Meta Data
We can also collect more index meta data, then make index treedump and index block dump:
select object_name, object_id, to_char(object_id, 'xxxxxxxx') object_id_hex from dba_objects t where object_name like 'TEST_TAB#%';
OBJECT_NAME OBJECT_ID OBJECT_ID
------------------ ---------- ---------
TEST_TAB#I#NAME1 5082850 4d8ee2
TEST_TAB#I#NAME2 5082851 4d8ee3
TEST_TAB#I#NAME3 5082852 4d8ee4
select column_name, avg_col_len from dba_tab_columns where table_name = 'TEST_TAB'; -- length 29
COLUMN_NAME AVG_COL_LEN
------------- -----------
ID 5
GRP 4
TSTR 15
NAME1 29
NAME2 29
NAME3 29
select segment_name, header_file, header_block from dba_segments t where segment_name like 'TEST_TAB#%';
SEGMENT_NAME HEADER_FILE HEADER_BLOCK
------------------ ----------- ------------
TEST_TAB#I#NAME1 22 826450
TEST_TAB#I#NAME2 22 826442
TEST_TAB#I#NAME3 22 826434
Index Treedump
alter session set tracefile_identifier = "TEST_TAB#I#NAME1_td";
alter session set events 'immediate trace name treedump level 5082850';
alter session set tracefile_identifier = "TEST_TAB#I#NAME2_td";
alter session set events 'immediate trace name treedump level 5082851';
alter session set tracefile_identifier = "TEST_TAB#I#NAME3_td";
alter session set events 'immediate trace name treedump level 5082852';
Treedump shows that NAME1 and NAME2 index blocks have avs = 4000 (AVailable Space, free space within the leaf block),
whereas NAME3 has only avs=5. That is why name1 and name2 have more LEAF_BLOCKS than name3.
TEST_TAB#I#NAME1
leaf: 0xf5fcd 1007565 (1: row:108.108 avs:4000)
TEST_TAB#I#NAME2
leaf: 0xf5dde 1007070 (1: row:108.108 avs:4000)
TEST_TAB#I#NAME3
leaf: 0xc9c46 826438 (1: row:202.202 avs:5)
The above dba_segments for TEST_TAB#I#NAME1 shows that its HEADER_BLOCK is 826450.
If we look following TEST_TAB#I#NAME1 tree dump, we can see that root node block: 826451 is the next block of its HEADER_BLOCK: 826450.
------------- TEST_TAB#I#NAME1 tree dump -------------
----- begin tree dump
branch: 0xc9c53 826451 (0: nrow: 16, level: 2)
branch: 0xf335c 996188 (-1: nrow: 162, level: 1)
leaf: 0xc9c54 826452 (-1: row:107.107 avs:4037)
leaf: 0xf3718 997144 (0: row:108.108 avs:4000)
leaf: 0xf5fcd 1007565 (1: row:108.108 avs:4000)
leaf: 0xf8517 1017111 (2: row:108.108 avs:4000)
leaf: 0x15a2fe 1417982 (3: row:108.108 avs:4000)
Note: "0xc9c53 826451" are DBA (Data Block Address) in (hex decimal).
For bigfile tablespace, it is block#; for smallfile tablespace, it is rfile# * power(2, 22) + block#.
see dbms_utility.data_block_address_file, dbms_utility.data_block_address_block.
If decimal number is negative, convert it by: power(2, 32) + "decimal number" = DBA.
Index analyze
analyze index TEST_TAB#I#NAME1 validate structure offline;
--validate index TEST_TAB#I#NAME1
select * from index_stats;
select * from index_histogram;
analyze index TEST_TAB#I#NAME3 validate structure offline;
--validate index TEST_TAB#I#NAME3
select * from index_stats;
select * from index_histogram;
Index Data Block Dump
alter session set tracefile_identifier = "TEST_TAB#I#NAME1_seg";
alter system dump datafile 22 block min 826450 block max 826457;
alter session set tracefile_identifier = "TEST_TAB#I#NAME2_seg";
alter system dump datafile 22 block min 826442 block max 826449;
alter session set tracefile_identifier = "TEST_TAB#I#NAME3_seg";
alter system dump datafile 22 block min 826434 block max 826441;
NAME1 Index blocks List
select blk, count(*), min(id) min_id, max(id) max_id, min(name1), max(name1) from (
select dbms_rowid.rowid_block_number(sys_op_lbid (5082845, 'L', rowid)) blk, t.* from test_tab t)
group by blk order by blk;
Index Entry Sequence
Dump and compare block# read sequence of index range scan and index fast full scan against index treedump (index data structure)
-- index range scan gets index TEST_TAB#I#NAME1 blocks in ordered read (db file sequential read').
-- index range scan first reads index blocks from root block to the leftmost satisfied first leaf block along branch blocks,
-- then reads from first found leaf blocks till last satisfied leaf block (which are linked with each one points to next one).
-- One block per read, logically sequential.
alter system flush buffer_cache;
alter session set tracefile_identifier = 'sql_trc_rs';
alter session set events '10046 trace name context forever, level 12';
with sq_rs as (select /*+ index_asc(t test_tab#i#name1) materialize */ name1, rownum rn from test_tab t where name1 like 'ONE_PART%')
select count(*) from sq_rs r;
alter session set events '10046 trace name context off';
-- index fast full scan gets index TEST_TAB#I#NAME1 blocks in unordered read ('db file scattered read').
-- index fast full scan reads all index blocks (brach/leaf) like full table scan.
-- Multiple blocks per read without considering any order.
alter system flush buffer_cache;
alter session set tracefile_identifier = 'sql_trc_ffs';
alter session set events '10046 trace name context forever, level 12';
with sq_ffs as (select /*+ index_ffs(t test_tab#i#name1) materialize */ name1, row_number() over(order by name1) rn from test_tab t where name1 like 'ONE_PART%')
select count(*) from sq_ffs f;
alter session set events '10046 trace name context off';