On Oracle: Performance of Oracle Object Collection Comparisons

Oracle object types are user-defined types that make it possible to model real-world entities, similar to the class mechanism in C++ and Java (Database Object-Relational Developer's Guide Performance of Object Comparisons).

This Blog presents an approach to investigate Performance of Oracle Object Collection Comparisons used in two functions:

SET

Converts a nested table into a set by eliminating duplicates. The function returns a nested table whose elements are distinct from one another. SET function requires map method, order method does NOT work.

Equal and Not Equal Comparisons

    Two nested tables are equal if they have the same named type, have the same cardinality, and their elements are equal based on map method.

The next Blog:  Performance of Oracle Object Collection Comparisons - Part2 will talk about other two functions: COLLECT and Multiset Operators.

By implementing a map member function in Object (similar to Java Comparator/Comparable Interface), the number of calls can be recorded. In aid of such counter, this Blog is trying to find a way to study the complexity of applied algorithm (see appended test code).

1. Setup the test (see appended Test Code)

2. Run the test by:
    truncate table test_stat;
    exec all_test_run_1(4);

3. Wait test finished, query the statistics by:

SET LINES 400
SET NUMWIDTH 12
SET NUMFORMAT 999,999,999
COLUMN TOTAL_COMP_EST_RATIO FORMAT 999.99
select s.*, round(s.total_comp_est/nullif(s.total_comp, 0), 2) total_comp_est_ratio
  from (select t.*, round(10*1000*elapsed/nullif(total_comp, 0), 2) us_per_comp,
         case test_name 
           when 'SET_TEST'               then cnt*(cnt-1)
           when 'SET_TEST_DIST'          then cnt*(cnt-1)*(disticnt_cnt/cnt)
           when 'EQUAL_TEST'             then 2*cnt
           when 'EQUAL_TEST_DIST'        then 2*cnt
           when 'EQUAL_TEST_RANDOM'      then (cnt*disticnt_cnt/2)
           when 'EQUAL_TEST_DIST_RANDOM' then (cnt*disticnt_cnt/2)
         end total_comp_est 
     from test_stat t) s;

returns:



  

  TEST_NAME
  RUN_ID
  CNT
  DISTICNT_CNT
  ELAPSED
  TOTAL_COMP
  SINGLE_COMP
  US_PER_COMP
  TOTAL_COMP_EST
  TOTAL_COMP_EST_RATIO
  
    SET_TEST
    
1

    
10

    
10

    
0

    
90

    
9

    
0

    
90

    
1

  
    SET_TEST
    
2

    
100

    
100

    
2

    
9900

    
99

    
2.02

    
9900

    
1

  
    SET_TEST
    
3

    
1000

    
1000

    
193

    
999000

    
999

    
1.93

    
999000

    
1

  
    SET_TEST
    
4

    
10000

    
10000

    
18826

    
99990000

    
9999

    
1.88

    
99990000

    
1

  
    SET_TEST_DIST
    
1

    
10000

    
10

    
21

    
109980

    
10.998

    
1.91

    
99990

    
0.91

  
    SET_TEST_DIST
    
2

    
10000

    
100

    
198

    
1009800

    
100.98

    
1.96

    
999900

    
0.99

  
    SET_TEST_DIST
    
3

    
10000

    
1000

    
1932

    
10008000

    
1000.8

    
1.93

    
9999000

    
1

  
    SET_TEST_DIST
    
4

    
10000

    
10000

    
18802

    
99990000

    
9999

    
1.88

    
99990000

    
1

  
    EQUAL_TEST
    
1

    
10

    
10

    
0

    
20

    
1

    
0

    
20

    
1

  
    EQUAL_TEST
    
2

    
100

    
100

    
0

    
200

    
1

    
0

    
200

    
1

  
    EQUAL_TEST
    
3

    
1000

    
1000

    
1

    
2000

    
1

    
5

    
2000

    
1

  
    EQUAL_TEST
    
4

    
10000

    
10000

    
95

    
20000

    
1

    
47.5

    
20000

    
1

  
    EQUAL_TEST_DIST
    
1

    
10000

    
10

    
95

    
20000

    
1

    
47.5

    
20000

    
1

  
    EQUAL_TEST_DIST
    
2

    
10000

    
100

    
95

    
20000

    
1

    
47.5

    
20000

    
1

  
    EQUAL_TEST_DIST
    
3

    
10000

    
1000

    
95

    
20000

    
1

    
47.5

    
20000

    
1

  
    EQUAL_TEST_DIST
    
4

    
10000

    
10000

    
95

    
20000

    
1

    
47.5

    
20000

    
1

  
    EQUAL_TEST_RANDOM
    
1

    
10

    
10

    
0

    
54

    
2.7

    
0

    
50

    
0.93

  
    EQUAL_TEST_RANDOM
    
2

    
100

    
100

    
1

    
4710

    
23.55

    
2.12

    
5000

    
1.06

  
    EQUAL_TEST_RANDOM
    
3

    
1000

    
1000

    
101

    
501358

    
250.679

    
2.01

    
500000

    
1

  
    EQUAL_TEST_RANDOM
    
4

    
10000

    
10000

    
10142

    
50549260

    
2527.463

    
2.01

    
50000000

    
0.99

  
    EQUAL_TEST_DIST_RANDOM
    
1

    
10000

    
10

    
327

    
1184932

    
59.2466

    
2.76

    
50000

    
0.04

  
    EQUAL_TEST_DIST_RANDOM
    
2

    
10000

    
100

    
956

    
4374904

    
218.7452

    
2.19

    
500000

    
0.11

  
    EQUAL_TEST_DIST_RANDOM
    
3

    
10000

    
1000

    
2968

    
14358672

    
717.9336

    
2.07

    
5000000

    
0.35

  
    EQUAL_TEST_DIST_RANDOM
    
4

    
10000

    
10000

    
9992

    
49718366

    
2485.9183

    
2.01

    
50000000

    
1.01

TEST_NAME	RUN_ID	CNT	DISTICNT_CNT	ELAPSED	TOTAL_COMP	SINGLE_COMP	US_PER_COMP	TOTAL_COMP_EST	TOTAL_COMP_EST_RATIO
SET_TEST	1	10	10	0	90	9	0	90	1
SET_TEST	2	100	100	2	9900	99	2.02	9900	1
SET_TEST	3	1000	1000	193	999000	999	1.93	999000	1
SET_TEST	4	10000	10000	18826	99990000	9999	1.88	99990000	1
SET_TEST_DIST	1	10000	10	21	109980	10.998	1.91	99990	0.91
SET_TEST_DIST	2	10000	100	198	1009800	100.98	1.96	999900	0.99
SET_TEST_DIST	3	10000	1000	1932	10008000	1000.8	1.93	9999000	1
SET_TEST_DIST	4	10000	10000	18802	99990000	9999	1.88	99990000	1
EQUAL_TEST	1	10	10	0	20	1	0	20	1
EQUAL_TEST	2	100	100	0	200	1	0	200	1
EQUAL_TEST	3	1000	1000	1	2000	1	5	2000	1
EQUAL_TEST	4	10000	10000	95	20000	1	47.5	20000	1
EQUAL_TEST_DIST	1	10000	10	95	20000	1	47.5	20000	1
EQUAL_TEST_DIST	2	10000	100	95	20000	1	47.5	20000	1
EQUAL_TEST_DIST	3	10000	1000	95	20000	1	47.5	20000	1
EQUAL_TEST_DIST	4	10000	10000	95	20000	1	47.5	20000	1
EQUAL_TEST_RANDOM	1	10	10	0	54	2.7	0	50	0.93
EQUAL_TEST_RANDOM	2	100	100	1	4710	23.55	2.12	5000	1.06
EQUAL_TEST_RANDOM	3	1000	1000	101	501358	250.679	2.01	500000	1
EQUAL_TEST_RANDOM	4	10000	10000	10142	50549260	2527.463	2.01	50000000	0.99
EQUAL_TEST_DIST_RANDOM	1	10000	10	327	1184932	59.2466	2.76	50000	0.04
EQUAL_TEST_DIST_RANDOM	2	10000	100	956	4374904	218.7452	2.19	500000	0.11
EQUAL_TEST_DIST_RANDOM	3	10000	1000	2968	14358672	717.9336	2.07	5000000	0.35
EQUAL_TEST_DIST_RANDOM	4	10000	10000	9992	49718366	2485.9183	2.01	50000000	1.01

The above test result table shows:

SET_TEST has a total number of comparison equal to cnt*(cnt-1), that means a complexity of o(n²).
SET_TEST_DIST has a total number of comparison equal to cnt*(cnt-1)*(disticnt_cnt/cnt), which depends the ratio of distinct items.
Therefore elapsed time (in centisecond) increasing with number of distinct values.

EQUAL_TEST has a total number of comparison equal to cnt*(cnt-1). i.e, a complexity of o(n).
EQUAL_TEST_DIST has the same complexity as EQUAL_TEST, and elapsed time is irrelevant with number of distinct values.

EQUAL_TEST_RANDOM has an estimated total number of comparison equal to (cnt*disticnt_cnt/2).
EQUAL_TEST_DIST_RANDOM has the same estimated complexity as EQUAL_TEST_RANDOM.

TOTAL_COMP is the number of calls on map method (comparator). For each comparing of two objects, two calls are counted.

The estimated complexity (TOTAL_COMP_EST) is not precise and only an attempt to approximate the real complexity. It would be interesting to determine the exact form of computation in a long term.

The discussed performance behavior was first raised during 2010 FIFA World Cup, it takes a long way to be a bit clear.

Map and Order Method

Oracle Documentation: Basic Components of Oracle Objects said:

 A map method maps object return values to scalar values and can order multiple values by their position on the scalar axis. 
 
 An order method is a function for an object (SELF), with one declared parameter that is an object of the same type. 
 The method must return either a negative number, zero, or a positive number. 
 
 You can declare a map method or an order method but not both.
 
 When sorting or merging a large number of objects, use a map method, which maps all the objects into scalars, then sorts the scalars. 
 An order method is less efficient because it must be called repeatedly (it can compare only two objects at a time).

The above test shows that the TOTAL_COMP (calls of map method) is not linear to the number of objects, which does not match the description:

 ... use a map method, which maps all the objects into scalars, then sorts the scalars.

It seems that map method is repeatedly called for each comparison of object instances.

Test Code


------------------------------- Setup map method ---------------------------------
drop table test_stat;

create table test_stat
 (test_name      varchar2 (40),
   run_id         number,
   cnt            number,
   disticnt_cnt   number,
   elapsed        number,
   total_comp     number,
   single_comp    number);
   
create or replace package helper as
 p_num1_cnt number := 0;
 procedure format(p_name varchar2, p_value number);
 procedure record(p_test_name varchar2, p_cnt number, p_disticnt_cnt number, 
                  p_elapsed number, p_total_comp number, p_single_comp number);
end helper;
/

create or replace package body helper as
 procedure format(p_name varchar2, p_value number) as
 begin
  dbms_output.put_line(rpad(p_name, 40) || lpad(p_value, 10));
 end format;

 procedure record(p_test_name varchar2, p_cnt number, p_disticnt_cnt number, 
                  p_elapsed number, p_total_comp number, p_single_comp number) as
   l_run_id  number;
 begin
   select nvl(max(run_id), 0) + 1 into l_run_id from test_stat where test_name = p_test_name;
   insert into test_stat values(p_test_name, l_run_id, p_cnt, p_disticnt_cnt
                 ,p_elapsed, p_total_comp, p_single_comp);
   commit;
 end record; 
end helper;
/

drop type tobj_tab force;
drop type tobj force;

create or replace type tobj as object (
 p_num1 number,
 p_num2 number,
 map member function comp return integer);
/

create or replace type body tobj as
 map member function comp return integer is 
    begin 
      helper.p_num1_cnt := helper.p_num1_cnt + 1;
      return p_num2; 
    end; 
end;
/

create or replace type tobj_tab as table of tobj;
/

------------------------------- SET_test ---------------------------------
create or replace procedure set_test (p_name varchar2, p_size number default 1000
                   ,p_dist_val pls_integer := null) as
 l_dist_val     number := coalesce(p_dist_val, p_size);
  l_tobj_tab          tobj_tab := tobj_tab();
  l_tobj_tab_distinct tobj_tab := tobj_tab();
  l_start_time     number;
  l_elapsed        number;
begin
  select cast(collect(tobj(level, mod(level, l_dist_val))) as tobj_tab) into l_tobj_tab 
    from dual connect by level <= p_size; 
    
  helper.p_num1_cnt := 0;
  l_start_time := dbms_utility.get_time;
  l_tobj_tab_distinct := set(l_tobj_tab);
  l_elapsed := dbms_utility.get_time - l_start_time;
  
  -- SET test depends on number of objects, and also number of distinct objects
  helper.record(p_name, cardinality(l_tobj_tab), l_tobj_tab_distinct.count,
         l_elapsed, helper.p_num1_cnt, (helper.p_num1_cnt/p_size));
  
  helper.format(rpad('-', 40, '-'), null);
  helper.format('l_tobj_tab.count=', cardinality(l_tobj_tab));
  helper.format('l_tobj_tab_distinct.count=', l_tobj_tab_distinct.count);
  helper.format('elapsed(centi)=', l_elapsed);
  helper.format('Total Number of Object Comparisons=', helper.p_num1_cnt);
  helper.format('Number of Object Comparisons / Object=', (helper.p_num1_cnt/p_size));
  helper.format('l_dist_val=', l_dist_val);
end;
/

--exec set_test('SET_TEST_DIST', 10, 5);

create or replace procedure set_test_run(p_run number) as
begin
 for i in 1..p_run loop
  set_test('SET_TEST', power(10, i), power(10, i));
 end loop;
 
 for i in 1..p_run loop
  set_test('SET_TEST_DIST', power(10, p_run), power(10, i));
 end loop;
end;
/ 

--exec set_test_run(3);

------------------------------- EQUAL_test ---------------------------------
create or replace procedure equal_test (p_name varchar2, p_size number default 1000
             ,p_dist_val pls_integer := null, p_random boolean :=false) as
 l_dist_val  number := coalesce(p_dist_val, p_size);
 l_test_name   varchar2(40);
  l_tobj_tab_1  tobj_tab := tobj_tab();
  l_tobj_tab_2 tobj_tab := tobj_tab();
  l_start_time  number;
  l_elapsed     number;
begin
  select cast(collect(tobj(level, mod(level, l_dist_val))) as tobj_tab) into l_tobj_tab_1 
    from dual connect by level <= p_size; 
 
  if p_random then
   l_test_name := p_name ||'_RANDOM';
  select cast(collect(tobj(x, mod(x, l_dist_val))) as tobj_tab) into l_tobj_tab_2 
     from (select level x from dual connect by level <= p_size order by dbms_random.value);   
 else
  l_test_name := p_name;
  select cast(collect(tobj(x, mod(x, l_dist_val))) as tobj_tab) into l_tobj_tab_2 
     from (select level x from dual connect by level <= p_size);   
 end if;
    
  helper.p_num1_cnt := 0;
  l_start_time := dbms_utility.get_time;
  if (l_tobj_tab_1 = l_tobj_tab_2) then
   helper.format('l_tobj_tab_1 = l_tobj_tab_2', null);
  else 
   helper.format('l_tobj_tab_1 != l_tobj_tab_2', null);
  end if;
  l_elapsed := dbms_utility.get_time - l_start_time;
  
  helper.record(l_test_name, cardinality(l_tobj_tab_1), l_dist_val,
         l_elapsed, helper.p_num1_cnt, (helper.p_num1_cnt/p_size/2));
         
  helper.format('l_tobj_tab_1.count=', cardinality(l_tobj_tab_1));
  helper.format('l_tobj_tab_2.count=', l_tobj_tab_2.count);
  helper.format('elapsed(centi)=', l_elapsed);
  helper.format('Total Number of Object Comparisons=', helper.p_num1_cnt);
  helper.format('Number of Object Comparisons/Object=', (helper.p_num1_cnt/p_size/2));
  helper.format('l_dist_val=', l_dist_val);
end;
/

--exec equal_test(10, 5, false);
--exec equal_test(10, 5, true);

create or replace procedure equal_test_run(p_run number, p_random boolean :=false) as
begin
 for i in 1..p_run loop
  equal_test('EQUAL_TEST', power(10, i), power(10, i), p_random);
 end loop;
 
 for i in 1..p_run loop
  equal_test('EQUAL_TEST_DIST', power(10, p_run), power(10, i), p_random);
 end loop;
end;
/ 

--exec equal_test_run(3, false);
--exec equal_test_run(3, true);

------------------------------- Test Control 1 ---------------------------------
create or replace procedure all_test_run_1(p_run number, p_random boolean :=false) as
begin
 set_test_run(p_run);
 equal_test_run(p_run, false);
 equal_test_run(p_run, true);
end;
/

On Oracle

Wednesday, January 13, 2016

Performance of Oracle Object Collection Comparisons - Part1

Map and Order Method

Test Code