Tuesday, April 7, 2020

Index Block Split Point Distribution

Continuing with previous Blog: Cache Buffer Chains Latch Contention Case Study-1: Reverse Key Index, this Blog will look the split point distribution of index block, specially for Reverse Key Index.


1. Index Split Stats


In this case study, I have also learned from others to look index split stats in v$sesstat or v$sysstat.

For example, insert 1'000'000 rows, and look leaf/branch/root node splits.

truncate table test_tab;

select sid, n.name, s.value 
  from v$mystat s, v$statname n 
 where s.statistic# = n.statistic#
   and name in ('root node splits', 'branch node splits', 'leaf node splits');
   
insert into test_tab select level, -level from dual connect by level <= 1e6;
commit;

select sid, n.name, s.value 
  from v$mystat s, v$statname n 
 where s.statistic# = n.statistic#
   and name in ('root node splits', 'branch node splits', 'leaf node splits');   
Here the output:
   
  SID  NAME                VALUE
  368  leaf node splits    0
  368  branch node splits  0
  368  root node splits    0

SQL (368,56362) > insert into test_tab select level, -level from dual connect by level <= 1e6;
  1'000'000 rows inserted.
   
  SID  NAME                VALUE
  368  leaf node splits    2964
  368  branch node splits  4
  368  root node splits    1


2. Split Point Distribution


The appended Plsql: indx_splits_stats_get can be used to get the exact point of index split. For example, insert 10'000 rows, and look split point distribution:

exec indx_splits_stats_get(1e4, 1);

select t.*, ins_row -lag(ins_row) over(order by ins_row) rows_between
from  indx_splits_stats t
where run=1 
order by ins_row;   

  RUN  INS_ROW  SID  ROOT_SPLITS  BRANCH_SPLITS  LEAF_SPLITS  ROWS_BETWEEN
  ---  -------  ---  -----------  -------------  -----------  ------------
  1    541      902  0            0              1
  1    996      902  0            0              2            455
  1    1131     902  0            0              3            135
  1    1967     902  0            0              4            836
  1    1994     902  0            0              5            27
  1    2310     902  0            0              6            316
  1    2325     902  0            0              7            15
  1    3959     902  0            0              8            1634
  1    3986     902  0            0              9            27
  1    4047     902  0            0              10           61
  1    4074     902  0            0              11           27
  1    4442     902  0            0              12           368
  1    4625     902  0            0              13           183
  1    4708     902  0            0              14           83
  1    4719     902  0            0              15           11
  1    7563     902  0            0              16           2844
  1    7671     902  0            0              17           108
  1    7689     902  0            0              18           18
  1    7758     902  0            0              19           69
  1    7797     902  0            0              20           39
  1    7884     902  0            0              21           87
  1    8251     902  0            0              22           367
  1    8278     902  0            0              23           27
  1    8837     902  0            0              24           559
  1    8942     902  0            0              25           105
  1    9128     902  0            0              26           186
  1    9209     902  0            0              27           81
  1    9220     902  0            0              28           11
  1    9827     902  0            0              29           607
  
  29 rows selected.
There are 29 'leaf node splits', in average, 344 rows per split. However, column ROWS_BETWEEN shows that the minimum distance between two splits are 11 rows, the maximum is 2844, about 258 (2844/11) times of difference, an irregular and unpredictable split distribution.

If we rebuild index test_tab#r as noreverse, inserting 10'000 rows will have only 18 'leaf node splits', every split occurs exactly after 533 rows insert, an totally even distribution.


3. Test Code: indx_splits_stats_get



drop view sesstat_v;

create view sesstat_v as
select sid, n.name, s.value from v$sesstat s, v$statname n where s.statistic# = n.statistic#;

drop table indx_splits_stats;

create table indx_splits_stats as
select 0 run, 0 ins_row, sid
       ,sum(decode(name, 'root node splits', value, 0))   as root_splits
       ,sum(decode(name, 'branch node splits', value, 0)) as branch_splits
       ,sum(decode(name, 'leaf node splits', value, 0))   as leaf_splits
  from sesstat_v where sid = -1 group by sid;

create or replace procedure indx_splits_stats_get(p_ins_rows number, p_run number) as
  l_sid             number := sys.dbms_support.mysid;
  l_root_splits_pre     number := 0;
  l_branch_splits_pre   number := 0;
  l_leaf_splits_pre     number := 0;
  l_root_splits     number := 0;
  l_branch_splits   number := 0;
  l_leaf_splits     number := 0;  
begin
  execute immediate 'truncate table test_tab';

  select sum(decode(name, 'root node splits', value, 0))   as root_splits
        ,sum(decode(name, 'branch node splits', value, 0)) as branch_splits
        ,sum(decode(name, 'leaf node splits', value, 0))   as leaf_splits
        into l_root_splits_pre, l_branch_splits_pre, l_leaf_splits_pre
  from sesstat_v where sid = l_sid;
  
  for i in 1..p_ins_rows loop
    insert into test_tab values(i, -i);
    commit;
    select sum(decode(name, 'root node splits', value, 0))   as root_splits
          ,sum(decode(name, 'branch node splits', value, 0)) as branch_splits
          ,sum(decode(name, 'leaf node splits', value, 0))   as leaf_splits
          into l_root_splits, l_branch_splits, l_leaf_splits
    from sesstat_v where sid = l_sid;
   
   if l_root_splits != l_root_splits_pre or l_branch_splits != l_branch_splits_pre or l_leaf_splits != l_leaf_splits_pre 
   then
     dbms_output.put_line('--- Leaf Splitted at row count: '||i||
                          ', '||l_root_splits||','||l_branch_splits||','||l_leaf_splits||',');
     insert into indx_splits_stats values(p_run, i, l_sid, l_root_splits, l_branch_splits, l_leaf_splits);
     l_root_splits_pre   := l_root_splits;
     l_branch_splits_pre := l_branch_splits;
     l_leaf_splits_pre   := l_leaf_splits; 
     commit;
  end if; 
  end loop; 
end;
/

-- exec indx_splits_stats_get(1e4, 1);