1. Index Split Stats
In this case study, I have also learned from others to look index split stats in v$sesstat or v$sysstat.
For example, insert 1'000'000 rows, and look leaf/branch/root node splits.
truncate table test_tab;
select sid, n.name, s.value
from v$mystat s, v$statname n
where s.statistic# = n.statistic#
and name in ('root node splits', 'branch node splits', 'leaf node splits');
insert into test_tab select level, -level from dual connect by level <= 1e6;
commit;
select sid, n.name, s.value
from v$mystat s, v$statname n
where s.statistic# = n.statistic#
and name in ('root node splits', 'branch node splits', 'leaf node splits');
Here the output:
SID NAME VALUE
368 leaf node splits 0
368 branch node splits 0
368 root node splits 0
SQL (368,56362) > insert into test_tab select level, -level from dual connect by level <= 1e6;
1'000'000 rows inserted.
SID NAME VALUE
368 leaf node splits 2964
368 branch node splits 4
368 root node splits 1
2. Split Point Distribution
The appended Plsql: indx_splits_stats_get can be used to get the exact point of index split. For example, insert 10'000 rows, and look split point distribution:
exec indx_splits_stats_get(1e4, 1);
select t.*, ins_row -lag(ins_row) over(order by ins_row) rows_between
from indx_splits_stats t
where run=1
order by ins_row;
RUN INS_ROW SID ROOT_SPLITS BRANCH_SPLITS LEAF_SPLITS ROWS_BETWEEN
--- ------- --- ----------- ------------- ----------- ------------
1 541 902 0 0 1
1 996 902 0 0 2 455
1 1131 902 0 0 3 135
1 1967 902 0 0 4 836
1 1994 902 0 0 5 27
1 2310 902 0 0 6 316
1 2325 902 0 0 7 15
1 3959 902 0 0 8 1634
1 3986 902 0 0 9 27
1 4047 902 0 0 10 61
1 4074 902 0 0 11 27
1 4442 902 0 0 12 368
1 4625 902 0 0 13 183
1 4708 902 0 0 14 83
1 4719 902 0 0 15 11
1 7563 902 0 0 16 2844
1 7671 902 0 0 17 108
1 7689 902 0 0 18 18
1 7758 902 0 0 19 69
1 7797 902 0 0 20 39
1 7884 902 0 0 21 87
1 8251 902 0 0 22 367
1 8278 902 0 0 23 27
1 8837 902 0 0 24 559
1 8942 902 0 0 25 105
1 9128 902 0 0 26 186
1 9209 902 0 0 27 81
1 9220 902 0 0 28 11
1 9827 902 0 0 29 607
29 rows selected.
There are 29 'leaf node splits', in average, 344 rows per split.
However, column ROWS_BETWEEN shows that the minimum distance between two splits are 11 rows,
the maximum is 2844, about 258 (2844/11) times of difference,
an irregular and unpredictable split distribution.If we rebuild index test_tab#r as noreverse, inserting 10'000 rows will have only 18 'leaf node splits', every split occurs exactly after 533 rows insert, an totally even distribution.
3. Test Code: indx_splits_stats_get
drop view sesstat_v;
create view sesstat_v as
select sid, n.name, s.value from v$sesstat s, v$statname n where s.statistic# = n.statistic#;
drop table indx_splits_stats;
create table indx_splits_stats as
select 0 run, 0 ins_row, sid
,sum(decode(name, 'root node splits', value, 0)) as root_splits
,sum(decode(name, 'branch node splits', value, 0)) as branch_splits
,sum(decode(name, 'leaf node splits', value, 0)) as leaf_splits
from sesstat_v where sid = -1 group by sid;
create or replace procedure indx_splits_stats_get(p_ins_rows number, p_run number) as
l_sid number := sys.dbms_support.mysid;
l_root_splits_pre number := 0;
l_branch_splits_pre number := 0;
l_leaf_splits_pre number := 0;
l_root_splits number := 0;
l_branch_splits number := 0;
l_leaf_splits number := 0;
begin
execute immediate 'truncate table test_tab';
select sum(decode(name, 'root node splits', value, 0)) as root_splits
,sum(decode(name, 'branch node splits', value, 0)) as branch_splits
,sum(decode(name, 'leaf node splits', value, 0)) as leaf_splits
into l_root_splits_pre, l_branch_splits_pre, l_leaf_splits_pre
from sesstat_v where sid = l_sid;
for i in 1..p_ins_rows loop
insert into test_tab values(i, -i);
commit;
select sum(decode(name, 'root node splits', value, 0)) as root_splits
,sum(decode(name, 'branch node splits', value, 0)) as branch_splits
,sum(decode(name, 'leaf node splits', value, 0)) as leaf_splits
into l_root_splits, l_branch_splits, l_leaf_splits
from sesstat_v where sid = l_sid;
if l_root_splits != l_root_splits_pre or l_branch_splits != l_branch_splits_pre or l_leaf_splits != l_leaf_splits_pre
then
dbms_output.put_line('--- Leaf Splitted at row count: '||i||
', '||l_root_splits||','||l_branch_splits||','||l_leaf_splits||',');
insert into indx_splits_stats values(p_run, i, l_sid, l_root_splits, l_branch_splits, l_leaf_splits);
l_root_splits_pre := l_root_splits;
l_branch_splits_pre := l_branch_splits;
l_leaf_splits_pre := l_leaf_splits;
commit;
end if;
end loop;
end;
/
-- exec indx_splits_stats_get(1e4, 1);