On Oracle: Oracle 11g AUTO_SAMPLE_SIZE performance on Small Tables

Oracle 11g enhanced DBMS_STATS.AUTO_SAMPLE_SIZE by a new algorithm in calculating NDV
(see Blog: Improvement of AUTO sampling statistics gathering feature in Oracle Database 11g ).

Recent experience on large Databases (a few TB with mixed objects' size) reveals the performance disadvantage on small tables when using AUTO_SAMPLE_SIZE. In most case, objects under 100 MB are 20% to 40% slower.

Such behaviour can be reproduced on a small test DB. At first, create a few tables with different size. Then call dbms_stats.gather_table_stats on them, and record session statistics for each case. (see appended TestCode).

Launch the test by:
     truncate table test_stats;
exec test_tab_run(10);

then collect test result by:

with sq as
   (select t.*, value - lag(value) over (partition by table_name, stat_name order by ts) val
      from test_stats t
      where stat_name in ('CPU used by this session', 'undo change vector size', 'sorts (rows)',
                          'db block gets', 'db block changes', 'physical reads', 'consistent gets'))
select a.table_name, a.table_size, a.stat_name, a.val "AUTO_VAL", p.val "100%_VAL"
       ,(a.val - p.val) diff, round(100*(a.val - p.val)/p.val, 2) "DIFF_%"
from (select * from sq where estimate_percent = 'AUTO') a
      ,(select * from sq where estimate_percent = '100%') p
where a.table_name = p.table_name
   and a.stat_name = p.stat_name
order by a.stat_name, a.table_size;

TABLE_NAME	TABLE_SIZE	STAT_NAME	AUTO_VAL	100%_VAL	DIFF	DIFF_%
TEST_TAB_0KB	131072	CPU used by this session	62	32	30	93.75
TEST_TAB_200KB	262144	CPU used by this session	48	35	13	37.14
TEST_TAB_800KB	917504	CPU used by this session	51	54	-3	-5.56
TEST_TAB_2MB	2228224	CPU used by this session	58	91	-33	-36.26
TEST_TAB_8MB	8519680	CPU used by this session	89	289	-200	-69.2
TEST_TAB_20MB	20971520	CPU used by this session	150	702	-552	-78.63
TEST_TAB_80MB	83755008	CPU used by this session	436	2800	-2364	-84.43
TEST_TAB_0KB	131072	consistent gets	25595	14167	11428	80.67
TEST_TAB_200KB	262144	consistent gets	18976	13582	5394	39.71
TEST_TAB_800KB	917504	consistent gets	19730	14376	5354	37.24
TEST_TAB_2MB	2228224	consistent gets	21338	15990	5348	33.45
TEST_TAB_8MB	8519680	consistent gets	28935	23622	5313	22.49
TEST_TAB_20MB	20971520	consistent gets	44004	38464	5540	14.4
TEST_TAB_80MB	83755008	consistent gets	124858	114275	10583	9.26
TEST_TAB_0KB	131072	db block changes	3347	1748	1599	91.48
TEST_TAB_200KB	262144	db block changes	2277	1639	638	38.93
TEST_TAB_800KB	917504	db block changes	2308	1684	624	37.05
TEST_TAB_2MB	2228224	db block changes	2344	1640	704	42.93
TEST_TAB_8MB	8519680	db block changes	2302	1813	489	26.97
TEST_TAB_20MB	20971520	db block changes	2318	1685	633	37.57
TEST_TAB_80MB	83755008	db block changes	6998	1713	5285	308.52
TEST_TAB_0KB	131072	db block gets	3531	1611	1920	119.18
TEST_TAB_200KB	262144	db block gets	2785	1497	1288	86.04
TEST_TAB_800KB	917504	db block gets	2786	1555	1231	79.16
TEST_TAB_2MB	2228224	db block gets	2849	1499	1350	90.06
TEST_TAB_8MB	8519680	db block gets	2785	1659	1126	67.87
TEST_TAB_20MB	20971520	db block gets	2815	1540	1275	82.79
TEST_TAB_80MB	83755008	db block gets	2765	1577	1188	75.33
TEST_TAB_0KB	131072	physical reads	1885	988	897	90.79
TEST_TAB_200KB	262144	physical reads	1136	906	230	25.39
TEST_TAB_800KB	917504	physical reads	1208	965	243	25.18
TEST_TAB_2MB	2228224	physical reads	1382	1146	236	20.59
TEST_TAB_8MB	8519680	physical reads	2139	1903	236	12.4
TEST_TAB_20MB	20971520	physical reads	3639	25991	-22352	-86
TEST_TAB_80MB	83755008	physical reads	11188	101658	-90470	-88.99
TEST_TAB_0KB	131072	sorts (rows)	13623	9873	3750	37.98
TEST_TAB_200KB	262144	sorts (rows)	12609	69442	-56833	-81.84
TEST_TAB_800KB	917504	sorts (rows)	12609	248542	-235933	-94.93
TEST_TAB_2MB	2228224	sorts (rows)	12609	606742	-594133	-97.92
TEST_TAB_8MB	8519680	sorts (rows)	12609	2397742	-2385133	-99.47
TEST_TAB_20MB	20971520	sorts (rows)	12609	5979742	-5967133	-99.79
TEST_TAB_80MB	83755008	sorts (rows)	12609	23889742	-23877133	-99.95
TEST_TAB_0KB	131072	undo change vector size	143968	84360	59608	70.66
TEST_TAB_200KB	262144	undo change vector size	92536	81572	10964	13.44
TEST_TAB_800KB	917504	undo change vector size	102888	82036	20852	25.42
TEST_TAB_2MB	2228224	undo change vector size	103436	81240	22196	27.32
TEST_TAB_8MB	8519680	undo change vector size	104356	95100	9256	9.73
TEST_TAB_20MB	20971520	undo change vector size	94504	90772	3732	4.11
TEST_TAB_80MB	83755008	undo change vector size	94528	90664	3864	4.26

Above table points out:

"CPU used by this session" is favorable to "AUTO_SAMPLE_SIZE" only when table size is over 800KB.
"consistent gets", "physical reads" are increasing with table size.
These reflect the real work of gather_table_stats.
"db block changes", "db block gets", "undo change vector size" are almost constant.
but "AUTO_SAMPLE_SIZE" is mostly higher than "100%".
They stand for the base load of gather_table_stats, since gather_table_stats hardly makes any modifications on the tables.
Regardless of table size, they pertain for each call of gather_table_stats.
"sorts (rows)" is constant for "AUTO_SAMPLE_SIZE", but increasing rapidly for "100%".
This exposes the effective advantage of new algorithm.

so we can say that only when base load in Point 3 is overwhelmed, AUTO_SAMPLE_SIZE exhibits the advantage.

The query on elapsed time (in millisecond) also confirms the variance of performance on object size for both methods:

with sq as
    (select t.*
           ,(ts - lag(ts) over (partition by table_name, stat_name order by ts))dur
       from test_stats t
       where stat_name in ('CPU used by this session'))
select a.table_name, a.table_size
        ,round(extract(minute from a.dur)*60*1000 + extract(second from a.dur)*1000) "AUTO_DUR"
        ,round(extract(minute from p.dur)*60*1000 + extract(second from p.dur)*1000) "100%_DUR"
        ,round(extract(minute from (a.dur-p.dur))*60*1000 + extract(second from (a.dur-p.dur))*1000) diff_dur
from (select * from sq where estimate_percent = 'AUTO') a
       ,(select * from sq where estimate_percent = '100%') p
where a.table_name = p.table_name
    and a.stat_name = p.stat_name
order by a.stat_name, a.table_size;

TABLE_NAME	TABLE_SIZE	AUTO_DUR	100%_DUR	DIFF_DUR
TEST_TAB_0KB	131072	1852	882	970
TEST_TAB_200KB	262144	986	748	239
TEST_TAB_800KB	917504	1066	1029	37
TEST_TAB_2MB	2228224	1179	1648	-469
TEST_TAB_8MB	8519680	1645	4819	-3174
TEST_TAB_20MB	20971520	2650	11407	-8756
TEST_TAB_80MB	83755008	7300	44910	-37610

As a conclusion, this Blog demonstrated the existence of crossing point on object size. Only apart from this point, AUTO_SAMPLE_SIZE is profitable, and benefice is proportional to the object size.

AUTO_SAMPLE_SIZE Internals

By a 10046 trace when running:
   exec dbms_stats.gather_table_stats(null, 'TEST_TAB_2MB', estimate_percent => DBMS_STATS.AUTO_SAMPLE_SIZE);

we can see the new Operation: APPROXIMATE NDV:

Rows     Row Source Operation
------- ---------------------------------------------------
       1 SORT AGGREGATE (cr=300 pr=296 pw=0 time=246170 us)
   30000   APPROXIMATE NDV AGGREGATE (cr=300 pr=296 pw=0 time=44330 us ...)
   30000    TABLE ACCESS FULL TEST_TAB_2MB (cr=300 pr=296 pw=0 time=23643 us ...)

As discussed in Amit Poddar's "One Pass Distinct Sampling" (comprising details of math foundations), the most expensive operation in the old method is due to aggregate function (count (distinct ..)) to calculate the NDVs, whereas the new algorithm scans the full table once to build a synopsis per column to calculate column NDV.

To explore APPROXIMATE NDV AGGREGATE internals, Oracle designated a special event:
   ORA-10832: Trace approximate NDV row source

The details of approximate NDV algorithm can be traced with event 10832 set at level 1.

The new algorithm is coded in DBMS_SQLTUNE_INTERNAL.GATHER_SQL_STATS and called by DBMS_STATS_INTERNAL.GATHER_SQL_STATS, which is again called by DBMS_STATS.GATHER_BASIC_USING_APPR_NDV, and finally by DBMS_STATS.GATHER_TABLE_STATS.

TestCode

drop view stats_v;

create view stats_v as
select n.name stat_name, s.value from v$statname n, v$mystat s where n.statistic# = s.statistic#;

drop table test_stats;

create table test_stats as
select timestamp'2013-11-22 10:20:30' ts, 'TEST_TAB_xxxxxxxxxx' table_name, 0 table_size, 'AUTO_or_100%' estimate_percent, v.*
from stats_v v where 1=2;

drop table test_tab_0kb;
create table test_tab_0kb(x number, y varchar2(100));
insert into test_tab_0kb(y) values(null);
commit;

drop table test_tab_200kb;
create table test_tab_200kb(x number, y varchar2(100));
insert into test_tab_200kb select level, rpad('abc', mod(level, 100), 'x') from dual connect by level <= 3000;
commit;

drop table test_tab_800kb;
create table test_tab_800kb(x number, y varchar2(100));
insert into test_tab_800kb select level, rpad('abc', mod(level, 100), 'x') from dual connect by level <= 12000;
commit;

drop table test_tab_1mb;
create table test_tab_1mb(x number, y varchar2(100));
insert into test_tab_1mb select level, rpad('abc', mod(level, 100), 'x') from dual connect by level <= 15000;
commit;

drop table test_tab_2mb;
create table test_tab_2mb(x number, y varchar2(100));
insert into test_tab_2mb select level, rpad('abc', mod(level, 100), 'x') from dual connect by level <= 30000;
commit;

drop table test_tab_8mb;
create table test_tab_8mb(x number, y varchar2(100));
insert into test_tab_8mb select level, rpad('abc', mod(level, 100), 'x') from dual connect by level <= 120000;
commit;

drop table test_tab_20mb;
create table test_tab_20mb(x number, y varchar2(100));
insert into test_tab_20mb select level, rpad('abc', mod(level, 100), 'x') from dual connect by level <= 300000;
commit;

drop table test_tab_80mb;
create table test_tab_80mb(x number, y varchar2(100));
insert into test_tab_80mb select level, rpad('abc', mod(level, 100), 'x') from dual connect by level <= 1200000;
commit;

create or replace procedure fill_tab(p_table_name varchar2, p_size_mb number) as
begin
for i in 1..p_size_mb loop
    execute immediate 'insert into ' || p_table_name||' select * from test_tab_1mb';
end loop;
commit;
end;
/

create or replace procedure test_tab_proc (p_table_name varchar2, p_cnt number) as
l_table_size number;

procedure flush as
    begin
      execute immediate 'alter system flush shared_pool';
      execute immediate 'alter system flush buffer_cache';
    end;
begin
execute immediate 'select bytes from dba_segments where segment_name = :s' into l_table_size using p_table_name;
insert into test_stats select systimestamp, p_table_name, l_table_size, 'start', v.* from stats_v v;
flush;
for i in 1..p_cnt loop
    dbms_stats.gather_table_stats(null, p_table_name, estimate_percent => DBMS_STATS.AUTO_SAMPLE_SIZE);
end loop;
insert into test_stats select systimestamp, p_table_name, l_table_size, 'AUTO', v.* from stats_v v;
flush;
for i in 1..p_cnt loop
    dbms_stats.gather_table_stats(null, p_table_name, estimate_percent => 100);
end loop;
insert into test_stats select systimestamp, p_table_name, l_table_size, '100%', v.* from stats_v v;
commit;
end;
/

create or replace procedure test_tab_run(p_cnt number) as
begin
test_tab_proc('TEST_TAB_0KB', p_cnt);
test_tab_proc('TEST_TAB_200KB', p_cnt);
test_tab_proc('TEST_TAB_800KB', p_cnt);
test_tab_proc('TEST_TAB_2MB', p_cnt);
test_tab_proc('TEST_TAB_8MB', p_cnt);
test_tab_proc('TEST_TAB_20MB', p_cnt);
test_tab_proc('TEST_TAB_80MB', p_cnt);
end;
/

Friday, May 23, 2014

Oracle 11g AUTO_SAMPLE_SIZE performance on Small Tables

AUTO_SAMPLE_SIZE Internals

TestCode