Tuesday, November 1, 2016

Blog List

"library cache: mutex X" and Application Context

Heavy Event: "library cache: mutex X" is observed when Application Context is frequently changed. The application is using Oracle Virtual Private Database to regulate data access with driving application context, which determines which policy group is in effect for each use case.

Run the appended Test Code by launching 4 Jobs:
  exec ctx_set_jobs(4);

Monitor Job sessions:

select sid, program, event, p1text, p1, p2text, p2, p3text, p3
  from v$session where program like '%(J%';

  SID PROGRAM              EVENT                     P1TEXT          P1 P2TEXT             P2 P3TEXT                P3
----- -------------------- ------------------------- ------ ----------- ------ -------------- ------ -----------------
   38 oracle@testdb (J003) library cache: mutex X    idn     1317011825 value   3968549781504 where   9041305591414788
  890 oracle@testdb (J000) library cache: mutex X    idn     1317011825 value    163208757248 where   9041305591414874
  924 oracle@testdb (J001) library cache: mutex X    idn     1317011825 value   4556960301056 where   9041305591414874
 1061 oracle@testdb (J002) library cache: mutex X    idn     1317011825 value   3968549781504 where   9041305591414879

Pick idn (P1): 1317011825, and query v$db_object_cache:

select name, namespace, type, hash_value, locks, pins, locked_total, pinned_total
from v$db_object_cache where hash_value in (1317011825);

NAME       NAMESPACE       TYPE             HASH_VALUE       LOCKS        PINS LOCKED_TOTAL PINNED_TOTAL
---------- --------------- --------------- ----------- ----------- ----------- ------------ ------------
TEST_CTX   APP CONTEXT     APP CONTEXT      1317011825           4           0            4    257802287

It shows that "library cache: mutex X" is on application context: TEST_CTX, and PINNED_TOTAL is probably increased for each access.
Although TEST_CTX is a local context and its values is stored in the User Global Area (UGA), the content of "library cache: mutex X" is globally on its definition.

select namespace, package, type from dba_context where namespace = 'TEST_CTX';

NAMESPACE  PACKAGE       TYPE
---------- ------------  ----------------
TEST_CTX   TEST_CTX_PKG  ACCESSED LOCALLY

After test, clean-up all jobs by:
  exec clean_jobs;

Test Code


create or replace context test_ctx using test_ctx_pkg;

create or replace package test_ctx_pkg is 
  procedure set_val (val number);
 end;
/

create or replace package body test_ctx_pkg is
  procedure set_val (val number) as
  begin
    dbms_session.set_context('test_ctx', 'attr', val);
  end;
end;
/

create or replace procedure ctx_set(p_cnt number, val number) as
begin
 for i in 1..p_cnt loop
  test_ctx_pkg.set_val(val);    -- 'library cache: mutex X' on TEST_CTX
 end loop;
end;
/

create or replace procedure ctx_set_jobs(p_job_cnt number) as
  l_job_id pls_integer;
begin
  for i in 1.. p_job_cnt loop
    dbms_job.submit(l_job_id, 'begin while true loop ctx_set(100000, '||i||'); end loop; end;');
  end loop;
  commit;
end;    
/

create or replace procedure clean_jobs as
begin
  for c in (select job from dba_jobs) loop
    begin
       dbms_job.remove (c.job);
    exception when others then null;
    end;
    commit;
  end loop;

  for c in (select d.job, d.sid, (select serial# from v$session where sid = d.sid) ser 
              from dba_jobs_running d) loop
    begin
      execute immediate
             'alter system kill session '''|| c.sid|| ',' || c.ser|| ''' immediate';
      dbms_job.remove (c.job);
    exception when others then null;
    end;
    commit;
  end loop;
  
  -- select * from dba_jobs;
  -- select * from dba_jobs_running;
end;
/


/**
exec ctx_set_jobs(4);

exec clean_jobs;

column program format a20
column event format a25
column p1text format a6
column p1 format 9999999999
column p2text format a6
column p2 format 9999999999999
column p3text format a6
column p3 format 9999999999999999

select sid, program, event, p1text, p1, p2text, p2, p3text, p3
  from v$session where program like '%(J%';

  SID PROGRAM              EVENT                     P1TEXT          P1 P2TEXT             P2 P3TEXT                P3
----- -------------------- ------------------------- ------ ----------- ------ -------------- ------ -----------------
   38 oracle@testdb (J003) library cache: mutex X    idn     1317011825 value   3968549781504 where   9041305591414788
  890 oracle@testdb (J000) library cache: mutex X    idn     1317011825 value    163208757248 where   9041305591414874
  924 oracle@testdb (J001) library cache: mutex X    idn     1317011825 value   4556960301056 where   9041305591414874
 1061 oracle@testdb (J002) library cache: mutex X    idn     1317011825 value   3968549781504 where   9041305591414879

  
column name format a10
column namespace format a15
column type format a15
set numformat 9999999999
  
select name, namespace, type, hash_value, locks, pins, locked_total, pinned_total
from v$db_object_cache where hash_value in (1317011825);

NAME       NAMESPACE       TYPE             HASH_VALUE       LOCKS        PINS LOCKED_TOTAL PINNED_TOTAL
---------- --------------- --------------- ----------- ----------- ----------- ------------ ------------
TEST_CTX   APP CONTEXT     APP CONTEXT      1317011825           4           0            4    257802287

**/

Monday, September 12, 2016

PL/SQL Multidimensional Collection Memory Usage and Performance

PL/SQL multidimensional collections are modeled by creating a collection whose elements are also collections.

We found that the memory usage and performance of Multidimensional Collections are dependent on the total number of branch nodes, which are determined by the data characteristics and the subscript indices ordering.

This Blog will try to demonstrate the findings with appended Test Code.

1. Know Your Data


At first, run two queries to understand the test data (see the Test Code):

SQL> select round(num_rows*avg_row_len/1024/1024) mb from dba_tables where table_name = 'MEM_TAB';

46 MB

SQL> select * 
  from (
  select count(distinct id) id, count(distinct name) name
        ,count(distinct num_1) num_1, count(distinct num_2) num_2
        ,count(distinct txt_1) txt_1, count(distinct txt_2) txt_2
        ,count(distinct num_1||'.'||num_2) num_1_2, count(distinct txt_1||'.'||txt_2) txt_1_2
    from mem_tab)
unpivot (distinct_count for count_name in
        (id as 'count(distinct id)', name as 'count(distinct name)'
        ,num_1 as 'count(distinct num_1)', num_2 as 'count(distinct num_2)'
        ,txt_1 as 'count(distinct txt_1)', txt_2 as 'count(distinct txt_2)' 
        ,num_1_2 as 'count(distinct num_1.num_2)'
        ,txt_1_2 as 'count(distinct txt_1.txt_2)'
        ));

COUNT_NAME                  DISTINCT_COUNT
--------------------------- --------------
count(distinct id)               1,000,000
count(distinct name)             1,000,000
count(distinct num_1)              500,000
count(distinct num_2)               50,000
count(distinct txt_1)              500,000
count(distinct txt_2)               50,000
count(distinct num_1.num_2)      1,000,000
count(distinct txt_1.txt_2)      1,000,000

Test table mem_tab is composed of 6 columns, contains 1,000,000 rows, and theoretical size is about 46 MB.
  • It has 2 unique columns (id and name), and two composite unique columns ((num_1.num_2), (txt_1.txt_2)).
  • num_1 and txt_1 have 500,000 distinct values each, branching factor = 2, hence 500,000 branch nodes when storing as array(num_1)(num_2);
  • num_2 and txt_2 have 50,000 distinct values each, branching factor = 20, hence 50,000 branch nodes when storing as array(num_2)(num_1).

2. Create Test


Run command:

SQL> exec test_collection_dimm_mem.run(1000000);

0  Dimension 1 array(id)  , pga_used(MB) =   411, Elapsed(Second)= 5.44
1  Dimension 1 array(name), pga_used(MB) =   459, Elapsed(Second)= 6.21
2  Dimension 2 array(num_1)(num_2), pga_used(MB) =  2227, Elapsed(Second)= 8.54
3  Dimension 2 array(num_2)(num_1), pga_used(MB) =   536, Elapsed(Second)= 5.44
4  Dimension 2 array(txt_1)(txt_2), pga_used(MB) =  2739, Elapsed(Second)= 7.79
5  Dimension 2 array(txt_2)(txt_1), pga_used(MB) =   628, Elapsed(Second)= 5.96
6  Dimension 2 array(num_1)(txt_2), pga_used(MB) =  2719, Elapsed(Second)= 6.53
7  Dimension 2 array(num_2)(txt_1), pga_used(MB) =   626, Elapsed(Second)= 5.62
8  Dimension 2 array(txt_1)(num_2), pga_used(MB) =  2251, Elapsed(Second)= 9.12
9  Dimension 2 array(txt_2)(num_1), pga_used(MB) =   539, Elapsed(Second)= 6.07
   
10 Dimension 1 table(id), pga_used(MB) =   411, Elapsed(Second)= 6.25
11 Dimension 2 table(num_1)(num_2), pga_used(MB) =   889, Elapsed(Second)= 9.78
12 Dimension 2 table(num_2)(num_1), pga_used(MB) =   443, Elapsed(Second)= 7.86  

The above output shows:
  1. One dimensional associative array has a factor 10 more memory usage compared to disk table size (46MB).
  2. Comparing RUN 4 and RUN 5, two dimensional can have a factor 60 more memory usage (2739/46) when organized with 500,000 branch nodes in array(txt_1)(txt_2); but factor 14 (628/46) when organized with 50,000 branch nodes in array(txt_2)(txt_1) by simply exchanging the subscripts;
    More than a factor 4 of difference when exchanging the order of subscripts.
    Each branch node takes about 5KB to 13KB.
    (imagine the cases of organizations with 50% vs. 5% employees being managers)
  3. One dimensional nested table and associative array are comparable.
  4. Two dimensional nested tables have a factor of 19 (888/46), and 10 (443/46).
  5. The performance is proportional to the memory usage.

3. Lookup Test


Once the data is stored in array/table, they can be used for in-memory search.
For example, in Test Code: test_collection_dimm_mem.run, reactivate the lookup snippet code:

    ---- lookup example ----
    l_start_time := dbms_utility.get_time;
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_node_rec := l_array_d2nn(c.num_1)(c.num_2);
    end loop;

Test output below shows that the performance difference is less than 30% among associative arrays.
But nested table is a factor 3 to 5 faster.

0  Dimension 1 array(id)  , pga_used(MB) =   411, Elapsed(Second)= 2.27
1  Dimension 1 array(name), pga_used(MB) =   459, Elapsed(Second)= 2.88
2  Dimension 2 array(num_1)(num_2), pga_used(MB) =  2227, Elapsed(Second)= 2.85
3  Dimension 2 array(num_2)(num_1), pga_used(MB) =   536, Elapsed(Second)= 2.51
4  Dimension 2 array(txt_1)(txt_2), pga_used(MB) =  2739, Elapsed(Second)= 3.3
5  Dimension 2 array(txt_2)(txt_1), pga_used(MB) =   628, Elapsed(Second)= 2.95
6  Dimension 2 array(num_1)(txt_2), pga_used(MB) =  2719, Elapsed(Second)= 2.77
7  Dimension 2 array(num_2)(txt_1), pga_used(MB) =   626, Elapsed(Second)= 2.58
8  Dimension 2 array(txt_1)(num_2), pga_used(MB) =  2251, Elapsed(Second)= 3.4
9  Dimension 2 array(txt_2)(num_1), pga_used(MB) =   539, Elapsed(Second)= 2.93
   
10 Dimension 1 table(id), pga_used(MB) =   411, Elapsed(Second)= .45
11 Dimension 2 table(num_1)(num_2), pga_used(MB) =   889, Elapsed(Second)= 1.09
12 Dimension 2 table(num_2)(num_1), pga_used(MB) =   443, Elapsed(Second)= .8


4. Heapdump Dump


Make heapdump for 3 Cases:
  1. Dimension 1 array(id) in RUN 0
  2. Dimension 2 array(num_1)(num_2) in RUN 2
  3. Dimension 2 array(num_2)(num_1) in RUN 3
by:

alter session set events 'immediate trace name heapdump level 16';

respective "top uga heap" displays the main memory usage:

--- Dimension 1 array(id) ---
 HEAP DUMP heap name="top uga heap"  desc=0x7f4b2d56ada0
 Total heap size    =427532128
 
--- Dimension 2 array(num_1)(num_2) ---
 HEAP DUMP heap name="top uga heap"  desc=0x7f94fddf3da0
 Total heap size    =2331900936

--- Dimension 2 array(num_2)(num_1) ---
 HEAP DUMP heap name="top uga heap"  desc=0x7fb10cf26da0
 Total heap size    =558883712  


5. PGA Dump


Further make pga_detail_dump by:

alter session set events 'immediate trace name pga_detail_dump level 27';

which reveals corresponding details by categories:
(note 27 is Oracle process number (PID), only top categories are listed)

--- Dimension 1 array(id) ---
  220672920 bytes,13515 chunks: "pmuccst: adt/record       "  PL/SQL
            koh-kghu sessi  ds=0x7f4b27f791b8  dsprt=0x7f4b28eed740
  182716608 bytes,11196 chunks: "pl/sql vc2                "  PL/SQL
            koh-kghu sessi  ds=0x7f4b27f791b8  dsprt=0x7f4b28eed740
   18667040 bytes,1144 chunks: "pmucalm coll              "  PL/SQL
            koh-kghu sessi  ds=0x7f4b27f791b8  dsprt=0x7f4b28eed740
     973992 bytes,  66 chunks: "static frame of inst      "  PL/SQL
            koh-kghu sessi  ds=0x7f4b282a8580  dsprt=0x7f4b28eed740
  
--- Dimension 2 array(num_1)(num_2) ---        
  1633408272 bytes,100038 chunks: "pmucalm coll              "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740
  282131512 bytes,17279 chunks: "pmuccst: adt/record       "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740
  187843616 bytes,11510 chunks: "pl/sql vc2                "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740
  117545272 bytes,7199 chunks: "pmucpcon: ipm             "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740
   57776680 bytes,3539 chunks: "pmucpcon: tds             "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740
   41783352 bytes,2559 chunks: "pmucpcon: cds             "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740          
  
--- Dimension 2 array(num_2)(num_1) ---
  243923992 bytes,14939 chunks: "pmuccst: adt/record       "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740
  160771776 bytes,9852 chunks: "pl/sql vc2                "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740
  127166584 bytes,7789 chunks: "pmucalm coll              "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740
   10956088 bytes, 671 chunks: "pmucpcon: ipm             "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740
    5747456 bytes, 352 chunks: "pmucpcon: cds             "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740
    4416776 bytes, 271 chunks: "pmucpcon: tds             "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740   

In the above 3 Cases, "pmuccst: adt/record" and "pl/sql vc2" are almost similar, but "pmucalm coll" makes the difference.
  1. "pl/sql vc2" are all involved varchar2 strings.
  2. "pmuccst: adt/record" (ADT: Abstract Data Type) stores all the PL/SQL records, in this test, it is 1,000,000.
  3. "pmucalm coll" looks like all the allocated collections, which represent the branch nodes. array(num_1)(num_2) takes 1,633,408,272 Bytes, whereas array(num_2)(num_1) 127,166,584 since the first one has 500,000 branch nodes, whereas second 50,000.
In Oracle applications, PL/SQL collections are often the cause of ORA-04030 when storing large number of elements (same type). Determining the categories helps pinpoint the main memory consumption.
  • "pmuccst: adt/record" stands for leaf nodes, that is the number of elements.
  • "pmucalm coll" represents branch nodes, that is the number of collections.

6. Populate process_memory_detail


Dump process_memory_detail into process_memory_detail_v by setting event PGA_DETAIL_GET, then query the result with:

column run_name format A40
column "NAME(MB, ALLOC_COUNT) List" format A200
column name format A30

select run, run_name, pid, round(sum(bytes)/1024/1024) mb, sum(allocation_count) allocation_count
from process_memory_detail_v 
group by run, run_name, pid 
order by run;

 RUN RUN_NAME                          PID      MB ALLOCATION_COUNT
---- -------------------------------- ---- ------- ----------------
   0 Dimension 1 array(id)              27     409           29,676
   1 Dimension 1 array(name)            27     458           32,824
   2 Dimension 2 array(num_1)(num_2)    27   2,220          145,969
   3 Dimension 2 array(num_2)(num_1)    27     534           37,721
   4 Dimension 2 array(txt_1)(txt_2)    27   2,730          178,767
   5 Dimension 2 array(txt_2)(txt_1)    27     626           43,632
   6 Dimension 2 array(num_1)(txt_2)    27   2,711          177,517
   7 Dimension 2 array(num_2)(txt_1)    27     624           43,511
   8 Dimension 2 array(txt_1)(num_2)    27   2,244          147,554
   9 Dimension 2 array(txt_2)(num_1)    27     537           37,895
  10 Dimension 1 table(id)              27     411           29,745
  11 Dimension 2 table(num_1)(num_2)    27     886           60,281
  12 Dimension 2 table(num_2)(num_1)    27     443           31,829
-------------------------------------------------------------------  

select run, run_name, pid, round(sum(bytes)/1024/1024) mb,
       (select listagg(rpad(name||'('||round(bytes/1024/1024)||', '||allocation_count||')', 40, chr(32)), '; ') 
               within group (order by bytes desc) agg
          from (select * from process_memory_detail_v c 
                 where c.run = v.run and c.pid = v.pid and bytes > 0
                 order by bytes desc) c 
         where rownum <= 3
         ) "NAME(MB, ALLOC_COUNT) List"
from process_memory_detail_v v
group by run, run_name, pid
order by run, run_name, pid;

RUN RUN_NAME                         PID     MB NAME(MB, ALLOC_COUNT) List
--- ------------------------------- ---- ------ ---------------------------------------------------------------------------------------------------
  0 Dimension 1 array(id)             27    409 pmuccst: adt/record(225, 14456) ; pl/sql vc2(161, 10311)                  ; pmucalm coll(17, 1084)
  1 Dimension 1 array(name)           27    458 pl/sql vc2(196, 12561)          ; pmuccst: adt/record(190, 12220)         ; pmucalm coll(65, 4196)
  2 Dimension 2 array(num_1)(num_2)   27  2,220 pmucalm coll(1560, 100167)      ; pmuccst: adt/record(271, 17379)         ; pl/sql vc2(179, 11488)
  3 Dimension 2 array(num_2)(num_1)   27    534 pmuccst: adt/record(231, 14827) ; pl/sql vc2(153, 9836)                   ; pmucalm coll(123, 7902)
  4 Dimension 2 array(txt_1)(txt_2)   27  2,730 pmucalm coll(2699, 173350)      ; pmuccst: adt/record(14, 882)            ; pl/sql vc2(10, 675)
  5 Dimension 2 array(txt_2)(txt_1)   27    626 pmucalm coll(229, 14676)        ; pmuccst: adt/record(207, 13302)         ; pl/sql vc2(176, 11272)
  6 Dimension 2 array(num_1)(txt_2)   27  2,711 pmucalm coll(2596, 166684)      ; pmuccst: adt/record(62, 3975)           ; pl/sql vc2(47, 2993)
  7 Dimension 2 array(num_2)(txt_1)   27    624 pmucalm coll(222, 14238)        ; pmuccst: adt/record(221, 14194)         ; pl/sql vc2(167, 10739)
  8 Dimension 2 array(txt_1)(num_2)   27  2,244 pmucalm coll(1570, 100838)      ; pmuccst: adt/record(273, 17546)         ; pl/sql vc2(185, 11904)
  9 Dimension 2 array(txt_2)(num_1)   27    537 pmuccst: adt/record(232, 14931) ; pl/sql vc2(155, 9946)                   ; pmucalm coll(123, 7870)
 10 Dimension 1 table(id)             27    411 pmuccst: adt/record(222, 14276) ; pl/sql vc2(163, 10488)                  ; pmucalm coll(17, 1089)
 11 Dimension 2 table(num_1)(num_2)   27    886 pmucalm coll(286, 18381)        ; pmuccst: adt/record(248, 15936)         ; pl/sql vc2(144, 9229)
 12 Dimension 2 table(num_2)(num_1)   27    443 pmuccst: adt/record(229, 14680) ; pl/sql vc2(155, 9951)                   ; pmucalm coll(30, 1905)
----------------------------------------------------------------------------------------------------------

select run, run_name, pid, category, name, heap_name, round(bytes/1024/1024) mb
      ,allocation_count, round(bytes/allocation_count) bytes_per_cnt
from process_memory_detail_v 
where name = 'pmucalm coll'
order by pid, category, name, heap_name, run, timestamp;

RUN RUN_NAME                         PID  CATEGORY  NAME           HEAP_NAME        MB      ALLOCATION_COUNT   BYTES_PER_CNT
--- ------------------------------- ---- ---------- ------------- ---------------- ------  -----------------  --------------
  0 Dimension 1 array(id)             27  PL/SQL    pmucalm coll   koh-kghu sessi      17              1,084          16,317
  1 Dimension 1 array(name)           27  PL/SQL    pmucalm coll   koh-kghu sessi      65              4,196          16,325
  2 Dimension 2 array(num_1)(num_2)   27  PL/SQL    pmucalm coll   koh-kghu sessi   1,560            100,167          16,328
  3 Dimension 2 array(num_2)(num_1)   27  PL/SQL    pmucalm coll   koh-kghu sessi     123              7,902          16,326
  4 Dimension 2 array(txt_1)(txt_2)   27  PL/SQL    pmucalm coll   koh-kghu sessi   2,699            173,350          16,328
  5 Dimension 2 array(txt_2)(txt_1)   27  PL/SQL    pmucalm coll   koh-kghu sessi     229             14,676          16,327
  6 Dimension 2 array(num_1)(txt_2)   27  PL/SQL    pmucalm coll   koh-kghu sessi   2,596            166,684          16,328
  7 Dimension 2 array(num_2)(txt_1)   27  PL/SQL    pmucalm coll   koh-kghu sessi     222             14,238          16,327
  8 Dimension 2 array(txt_1)(num_2)   27  PL/SQL    pmucalm coll   koh-kghu sessi   1,570            100,838          16,328
  9 Dimension 2 array(txt_2)(num_1)   27  PL/SQL    pmucalm coll   koh-kghu sessi     123              7,870          16,326
 10 Dimension 1 table(id)             27  PL/SQL    pmucalm coll   koh-kghu sessi      17              1,089          16,317
 11 Dimension 2 table(num_1)(num_2)   27  PL/SQL    pmucalm coll   koh-kghu sessi     286             18,381          16,327
 12 Dimension 2 table(num_2)(num_1)   27  PL/SQL    pmucalm coll   koh-kghu sessi      30              1,905          16,322

  • The first query shows the memory usage per run.
  • The second query lists the memory allocations of top 3 Heaps for each run.
  • The third query reveals Heap "pmucalm coll" statistics of memory and allocation_count.
    The top 4 RUNs (2, 4, 6, 8) have 500,000 branch nodes, and hence more memory and allocation_count.
    Computing the average bytes per allocation, BYTES_PER_CNT shows almost a constant value of 16K.
    Probably generous reserving 16K (2x8K-Blocks) for each "pmucalm coll" conforms to the design strategy of preventing expensive branch node splitting.

7. Test Code



set numformat 999,999,999

drop table mem_tab;

create table mem_tab(id number, name varchar2(10) 
                    ,num_1 number, num_2 number 
                    ,txt_1 varchar2(10), txt_2 varchar2(10));

insert into mem_tab 
select level id, rpad(level, 10, 'X') name
   ,level num_1, ceil(level/20) num_2 
   ,rpad(level, 10, 'X') txt_1, rpad(ceil(level/20), 10, 'X') txt_2 
  from dual connect by level <= 500000
union all
select (level + 500000) id, rpad((level + 500000), 10, 'X') name
   ,level num_1, -ceil(level/20) num_2 
   ,rpad(level, 10, 'X') txt_1, rpad(-ceil(level/20), 10, 'X') txt_2 
  from dual connect by level <= 500000;

commit;

exec dbms_stats.gather_table_stats(null, 'MEM_TAB');

select round(num_rows*avg_row_len/1024/1024) mb from dba_tables where table_name = 'MEM_TAB';
-- 46 MB

select * 
  from (
  select count(distinct id) id, count(distinct name) name
        ,count(distinct num_1) num_1, count(distinct num_2) num_2
        ,count(distinct txt_1) txt_1, count(distinct txt_2) txt_2
        ,count(distinct num_1||'.'||num_2) num_1_2, count(distinct txt_1||'.'||txt_2) txt_1_2
    from mem_tab)
unpivot (distinct_count for count_name in
        (id as 'count(distinct id)', name as 'count(distinct name)'
        ,num_1 as 'count(distinct num_1)', num_2 as 'count(distinct num_2)'
        ,txt_1 as 'count(distinct txt_1)', txt_2 as 'count(distinct txt_2)' 
        ,num_1_2 as 'count(distinct num_1.num_2)'
        ,txt_1_2 as 'count(distinct txt_1.txt_2)'
        ));

create or replace package test_collection_dimm_mem as
  procedure run (p_cnt number := 1000000);
end;
/

drop table process_memory_detail_v;

create table process_memory_detail_v as 
  select 123 run, rpad('A', 40, 'X') run_name, timestamp'1998-02-17 11:22:00' timestamp
        ,234 session_id, 345 session_serial#, v.* 
  from v$process_memory_detail v where 1=2;

create or replace package body test_collection_dimm_mem as
  type t_rec is record (
    id            number
   ,name          varchar2(10)
   ,num_1         number
   ,num_2         number
   ,txt_1         varchar2(10)
   ,txt_2         varchar2(10)
  );
  type t_array_d1n  is table of t_rec      index by pls_integer;    -- array(number)
  type t_array_d1t  is table of t_rec      index by varchar2(30);    -- array(text)
  
  type t_array_d2nn is table of t_array_d1n index by pls_integer;    -- array(number)(number)
  type t_array_d2tt is table of t_array_d1t index by varchar2(30);   -- array(text)(text)
  
  type t_array_d2nt is table of t_array_d1t index by pls_integer;    -- array(number)(text)
  type t_array_d2tn is table of t_array_d1n index by varchar2(30);   -- array(text)(number)
  
  type t_nesttab_d1 is table of t_rec;
  type t_nesttab_d2 is table of t_nesttab_d1;
     
  l_node_rec       t_rec;
  l_array_d1n      t_array_d1n;                        -- array(number)
  l_array_d1t      t_array_d1t;                        -- array(text)
  l_array_d2nn     t_array_d2nn;                        -- array(number)(number)
  l_array_d2tt     t_array_d2tt;                        -- array(text)(text)
  l_array_d2nt     t_array_d2nt;                        -- array(number)(text)
  l_array_d2tn     t_array_d2tn;                        -- array(text)(number)
  l_nesttab_d1     t_nesttab_d1 := new t_nesttab_d1(); -- nested Table (number)
  l_nesttab_d2     t_nesttab_d2 := new t_nesttab_d2(); -- nested Table (number)(number)
  l_node_map_sep   varchar2(1) := '.';
  l_start_time     number;
  l_mb             varchar2(10);
  l_last_num       number;
  l_pid            number;
  l_sid            number;
  l_serial#        number;
  l_run            number := 0;
  l_pga_status     varchar2(10) := 'NOT';
  
  procedure reset as
  begin
    l_array_d1n.delete;
    l_array_d1t.delete;
    l_array_d2nn.delete;
    l_array_d2tt.delete;
    l_array_d2nt.delete;
    l_array_d2tn.delete;
    l_nesttab_d1.delete;
    l_nesttab_d2.delete;
    dbms_session.free_unused_user_memory;
    l_start_time := dbms_utility.get_time;
  end;

  procedure prt_and_reset_mem(p_name varchar2) as
  begin
    select lpad(round(pga_used_mem/1024/1024), 5, ' '), pid, s.sid, s.serial#
      into l_mb, l_pid, l_sid, l_serial#
      from v$process p, v$session s
     where p.addr = s.paddr and s.sid = sys.dbms_support.mysid and rownum = 1;
    dbms_output.put_line(rpad(l_run, 3, ' ')||p_name||', pga_used(MB) = '||l_mb||
             ', Elapsed(Second)= '||round((dbms_utility.get_time - l_start_time)/100, 2));
             
    execute immediate q'[alter session set events 'immediate trace name PGA_DETAIL_GET level ]'||l_pid||q'[']'; 
    -- wait status = COMPLETE when status = SCANNING
    while (true) loop
     select status into l_pga_status from v$process_memory_detail_prog where pid = l_pid;
     exit when l_pga_status = 'COMPLETE';
     dbms_lock.sleep(0.1);
    end loop;
    delete from process_memory_detail_v 
     where pid = l_pid and session_id = l_sid and session_serial# = l_serial# and run = l_run;
    insert into process_memory_detail_v select l_run, p_name, systimestamp, l_sid, l_serial#, v.* 
      from v$process_memory_detail v where pid = l_pid;
    -- outcomment to keep last PGA_DETAIL
    -- execute immediate q'[alter session set events 'immediate trace name PGA_DETAIL_CANCEL level ]'||l_pid||q'[']';  
    commit; 
    
    l_run := l_run + 1;
    reset;
  end;
  
  function rec(id number, name varchar2, num_1 number, num_2 number, txt_1 varchar2, txt_2 varchar2) 
    return t_rec as
  begin
    l_node_rec.id        := id;
    l_node_rec.name      := name;
    l_node_rec.num_1     := num_1;
    l_node_rec.num_2     := num_2;
    l_node_rec.txt_1     := txt_1;
    l_node_rec.txt_2     := txt_2;
    return l_node_rec;
  end;
    
  procedure run (p_cnt number := 1000000) as
  begin
    reset;
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d1n(c.id) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 1 array(id)  ');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d1t(c.name) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 1 array(name)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2nn(c.num_1)(c.num_2) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;

    ---- lookup example ----
    --l_start_time := dbms_utility.get_time;
    --for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
    --  l_node_rec := l_array_d2nn(c.num_1)(c.num_2);
    --end loop;

    prt_and_reset_mem('Dimension 2 array(num_1)(num_2)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2nn(c.num_2)(c.num_1) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(num_2)(num_1)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2tt(c.txt_1)(c.txt_2) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(txt_1)(txt_2)');   
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2tt(c.txt_2)(c.txt_1) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(txt_2)(txt_1)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2nt(c.num_1)(c.txt_2) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(num_1)(txt_2)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2nt(c.num_2)(c.txt_1) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(num_2)(txt_1)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2tn(c.txt_1)(c.num_2) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(txt_1)(num_2)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2tn(c.txt_2)(c.num_1) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(txt_2)(num_1)');
    
    --- nested table. Note: specify p_cnt = 1000000 to select all rows for test ---
    dbms_output.put_line('');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab 
               where rownum <= p_cnt order by id) loop
     l_nesttab_d1.extend;
     l_nesttab_d1(l_nesttab_d1.last) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 1 table(id)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab 
               where rownum <= p_cnt order by num_1, num_2) loop
     if (c.num_1 != l_last_num) then
      l_nesttab_d2.extend;
      l_nesttab_d2(l_nesttab_d2.last) := l_nesttab_d1;
      l_nesttab_d1 := new t_nesttab_d1();
     end if;
     l_nesttab_d1.extend;
     l_nesttab_d1(l_nesttab_d1.last) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
     l_last_num := c.num_1;
    end loop;
    l_nesttab_d2.extend;
    l_nesttab_d2(l_nesttab_d2.last) := l_nesttab_d1;

    ---- lookup example ----
    --l_start_time := dbms_utility.get_time;
    --for i in 1..l_nesttab_d2.count loop
    -- l_nesttab_d1 := l_nesttab_d2(i);
    -- for j in 1..l_nesttab_d1.count loop
    --   l_node_rec := l_nesttab_d1(j);
    -- end loop;
    --end loop;

    prt_and_reset_mem('Dimension 2 table(num_1)(num_2)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab 
               where rownum <= p_cnt order by num_2, num_1) loop
     if (c.num_2 != l_last_num) then
      l_nesttab_d2.extend;
      l_nesttab_d2(l_nesttab_d2.last) := l_nesttab_d1;
      l_nesttab_d1 := new t_nesttab_d1();
     end if;
     l_nesttab_d1.extend;
     l_nesttab_d1(l_nesttab_d1.last) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
     l_last_num := c.num_2;
    end loop;
    l_nesttab_d2.extend;
    l_nesttab_d2(l_nesttab_d2.last) := l_nesttab_d1;
    prt_and_reset_mem('Dimension 2 table(num_2)(num_1)');    
  end;     
end;
/

-- set serveroutput on
-- exec test_collection_dimm_mem.run(1000000);

Thursday, September 8, 2016

dbms_session.get_package_memory_utilization and limitations

Oracle 11g Release 2 extends dbms_session Package by introducing a new Procedure:
    get_package_memory_utilization
and 11.2.0.4.0 further enhanced with a second overloaded procedure to augment the measure from 2**31-1 up to 10**38.

It describes memory usage of PL/SQL package declare variables in either specification or body. but not memory usage of locally declared variables within functions, procedures, or anonymous blocks.

Oracle Documentation said:
    These procedures describe static package memory usage.
    The output collections describe memory usage in each instantiated package.

Probably "static package memory usage" stands for memory usage of package declared variables.

The second restriction is that it is bound to caller's session, and does not disclose heap allocation details.

This Blog till try to demonstrate such limitations.

See appended Test Code 1, 2 and its used dump_mem_usage in the Reference 3.

1. Test


SQL > exec pkg_mem_test.run_1(1000*1000);

-------- 1. PKG_MEM_TEST.run_1 START --------
v$process: (SID=854)Used/Alloc/Freeable/Max=6/8/1/11
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/7,PL/SQL/3/3/3,Other/3//3,Freeable/1/0/,
[Owner; Unit; Type; Used; Free; sum_abs_mb(1)]
S; PKG_MEM_TEST; 9  (package); 0; 0; 0
S; PKG_MEM_TEST; 11 (package body); 0; 0; 0

-------- 2. PKG_MEM_TEST.run_1 spec_rec_tab ALLOCATED --------
v$process: (SID=854)Used/Alloc/Freeable/Max=1282/1283/0/1283
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/1215/1211/1215,Other/68//68,
[Owner; Unit; Type; Used; Free; sum_abs_mb(1207)]
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206
S; PKG_MEM_TEST; 11 (package body); 0; 0; 0

-------- 3. PKG_MEM_TEST.run_1 body_rec_tab ALLOCATED --------
v$process: (SID=854)Used/Alloc/Freeable/Max=2496/2497/0/2497
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/2426/2420/2426,Other/71//71,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 4. PKG_MEM_TEST.run_1 local_rec_tab ALLOCATED --------
v$process: (SID=854)Used/Alloc/Freeable/Max=3710/3712/0/3712
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/3638/3629/3638,Other/73//73,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 5. PROC_MEM_TEST START --------
v$process: (SID=854)Used/Alloc/Freeable/Max=3710/3712/0/3712
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/3638/3629/3638,Other/73//73,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 6. PROC_MEM_TEST local_rec_tab ALLOCATED --------
v$process: (SID=854)Used/Alloc/Freeable/Max=4924/4926/0/4926
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/4850/4838/4850,Other/76//76,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 7. PKG_MEM_TEST.run_2 START --------
v$process: (SID=854)Used/Alloc/Freeable/Max=4924/4926/0/4926
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/4850/4838/4850,Other/76//76,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 8. PKG_MEM_TEST.run_2 local_rec_tab ALLOCATED --------
v$process: (SID=854)Used/Alloc/Freeable/Max=6138/6141/0/6141
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/6062/6047/6062,Other/78//78,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 9. PROC_MEM_TEST END --------
v$process: (SID=854)Used/Alloc/Freeable/Max=4924/6141/0/6141
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/4850/4838/6062,Other/1291//1291,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 10. PKG_MEM_TEST.run_1 END --------
v$process: (SID=854)Used/Alloc/Freeable/Max=3710/6141/0/6141
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/3638/3629/6062,Other/2503//2503,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

In Step 8, the total used memory is;
     6138 MB
but only 2412 from
    PKG_MEM_TEST; 11 (package body); 1177
  PKG_MEM_TEST; 9  (package); 1177
are listed.

The 3 local allocations in Steps:
  4. PKG_MEM_TEST.run_1 local_rec_tab
  6. PROC_MEM_TEST local_rec_tab
  8. PKG_MEM_TEST.run_2 local_rec_tab
are not reported.

2. Populate process_memory_detail


dbms_session.get_package_memory_utilization is restricted to report memory usage of caller's session, aggregated by each package (spec and body) name, which is a point of view in applications, and does not point out the details of allocated heap details.

V$PROCESS_MEMORY_DETAIL (Oracle 10.2 introduced) lists PGA memory usage by category, heap_name, and component name for each Oracle process, and summarized in V$PROCESS_MEMORY by category. They expose memory usage in the dimension of heap components, a point of view in program code.

Test Code 2 provides a V$PROCESS_MEMORY_DETAIL sampling tool to collect and monitor memory changes.

Run:

exec pga_sampling(p_sid => 234);

Query memory usage per sampling and display memory deviation by:

select v.*, mb - lag(mb) over(order by run) mb_delta 
  from(
    select run, timestamp, session_id, session_serial#, pid
          ,round(sum(bytes)/1024/1024) mb
          ,sum(allocation_count) allocation_count
    from process_memory_detail_v 
    group by run, timestamp, session_id, session_serial#, pid) v
order by run;     

List top 3 heap memory usages for each sampling by:

select run, timestamp, session_id, pid, round(sum(bytes)/1024/1024) mb,
       (select listagg(rpad(name||'('||round(bytes/1024/1024)||', '||allocation_count||')', 40, chr(32)), '; ') 
               within group (order by bytes desc) agg
          from (select * from process_memory_detail_v c 
                 where c.run = v.run and c.pid = v.pid and bytes > 0
                 order by bytes desc) c 
         where rownum <= 3
         ) "NAME(MB, ALLOC_COUNT) List"
from process_memory_detail_v v
group by run, timestamp, session_id, pid
order by run, timestamp, session_id, pid;

Join above query with v$active_session_history (or dba_hist_active_sess_history), one can approximately locate the memory usage related to PL/SQL program unit. Starting from this PL/SQL program unit, one can use 12c UTL_CALL_STACK (or BMS_UTILITY.FORMAT_CALL_STACK) to backtrace all call_stack involved units.

with sq as (
  select run, timestamp, v.session_id, session_serial#, pid, round(sum(bytes)/1024/1024) mb
        ,(select listagg(rpad(name||'('||round(bytes/1024/1024)||', '||allocation_count||')', 40, chr(32)), '; ') 
                 within group (order by bytes desc) agg
            from (select * from process_memory_detail_v c 
                   where c.run = v.run and c.pid = v.pid and bytes > 0
                   order by bytes desc) c 
           where rownum <= 3
           ) "NAME(MB, ALLOC_COUNT) List"
  from process_memory_detail_v v
  group by run, timestamp, v.session_id, session_serial#, pid)
select p.*
      ,(select owner||'.'||object_name||
               case when procedure_name is not null then '.' || procedure_name end
         from dba_procedures
        where object_id = s.plsql_entry_object_id and subprogram_id = s.plsql_entry_subprogram_id)
       plsql_entry
      ,s.sample_time, round(s.pga_allocated/1024/1024) pga_allocated_mb
  from sq p, v$active_session_history s
where p.session_id = s.session_id
  and p.session_serial# = s.session_serial#
  and p.timestamp between s.sample_time and s.sample_time + interval'1' second
 order by run, timestamp, p.session_id, p.session_serial#, pid, s.sample_time;


3. PGA memory internals


Look output of the first query in above Section, pick one RUN with some peak memory usage, for example, 4, Run query:

select run, category, name, heap_name, depth, path
      ,round(sum(bytes/1024)) kb, sum(allocation_count) alloc_count
      ,heap_descriptor, parent_heap_descriptor, cycle
from (
  select v.*, (level-1) depth
        ,sys_connect_by_path('('||category||' , '||name||' , '||heap_name||')', ' -> ') path
        ,connect_by_iscycle as cycle
  from process_memory_detail_v v
  where lower(name) like '%recursive addr reg file%'
  start with parent_heap_descriptor = '00' and run = 4
  connect by nocycle prior heap_descriptor = parent_heap_descriptor and prior run = run
) 
--where lower(name) like '%recursive addr reg file%'
group by run, category, name, heap_name, heap_descriptor, parent_heap_descriptor, depth, path, cycle
--having sum(bytes/1024) > 1024
order by run, category, name, heap_name, depth, kb;

  • It depicts the PGA heap tree structure by connecting heap_descriptor with parent_heap_descriptor.
  • We can observe that only "Other" Category has depth 0 (root) nodes, and all other Categories are subtrees to "Other".
    (One exception is when CATEGORY = 'PL/SQL' and NAME='miscellaneous', both heap_descriptor and parent_heap_descriptor equal to '00', causing cycle)
  • One time we noticed certain high PGA memory consumptions. By running above query, it turns out that the main contribution is due to "recursive addr reg file". Further searching Oracle MOS, it is documented as something related to PLSQL anonymous blocks.
    (MOS Bug 9478199 - Memory corruption / ORA-600 from PLSQL anonymous blocks (Doc ID 9478199.8)).
  • dbms_session.get_package_memory_utilization is hard to inject into existing code, let alone Oracle background processes. However, populating process_memory_detail opens a tiny door to peer Oracle internals, even for background processes, for example, PMON, DBWx, CJQ0, MMON.

4. Test Code 1


create or replace package pkg_mem_test is
  type t_rec        is record (id number, text varchar2(1000));
  type t_rec_tab    is table of t_rec index by pls_integer;
  spec_rec_tab      t_rec_tab;
  
  procedure run_1(p_cnt number);
  procedure run_2(p_cnt number);
end;
/

create or replace procedure proc_mem_test(p_cnt number) as
  type t_rec      is record (id number, text varchar2(1000));
  type t_rec_tab  is table of t_rec index by pls_integer;
  local_rec_tab   t_rec_tab;
begin
  dump_mem_usage.run('PROC_MEM_TEST START');

  select level id, rpad('ABC', 1000, 'X') text 
  bulk collect into local_rec_tab
  from dual connect by level <= p_cnt;
  
  dump_mem_usage.run('PROC_MEM_TEST local_rec_tab ALLOCATED');
  
  pkg_mem_test.run_2(p_cnt);
  
  dump_mem_usage.run('PROC_MEM_TEST END');
end;
/

create or replace package body pkg_mem_test is
  body_rec_tab      t_rec_tab;
  
  procedure run_1(p_cnt number) as
    local_rec_tab   t_rec_tab;
  begin
    dump_mem_usage.run('PKG_MEM_TEST.run_1 START');
  
    select level id, rpad('ABC', 1000, 'X') text 
    bulk collect into spec_rec_tab
    from dual connect by level <= p_cnt;
  
    dump_mem_usage.run('PKG_MEM_TEST.run_1 spec_rec_tab ALLOCATED');
    
    select level id, rpad('ABC', 1000, 'X') text 
    bulk collect into body_rec_tab
    from dual connect by level <= p_cnt;
  
    dump_mem_usage.run('PKG_MEM_TEST.run_1 body_rec_tab ALLOCATED');
    
    select level id, rpad('ABC', 1000, 'X') text 
    bulk collect into local_rec_tab
    from dual connect by level <= p_cnt;
  
    dump_mem_usage.run('PKG_MEM_TEST.run_1 local_rec_tab ALLOCATED');
    
    proc_mem_test(p_cnt);
    
    dump_mem_usage.run('PKG_MEM_TEST.run_1 END');
  end;
  
  procedure run_2(p_cnt number) as
    local_rec_tab    t_rec_tab;
  begin
    dump_mem_usage.run('PKG_MEM_TEST.run_2 START');
  
    select level id, rpad('ABC', 1000, 'X') text 
    bulk collect into local_rec_tab
    from dual connect by level <= p_cnt;
  
    dump_mem_usage.run('PKG_MEM_TEST.run_2 local_rec_tab ALLOCATED');  
    end;
end;
/

-- exec pkg_mem_test.run_1(1000*1000);

5. Test Code 2


drop table process_memory_detail_v;

create table process_memory_detail_v as 
  select 123 run, rpad('A', 40, 'X') run_name, timestamp'1998-02-17 11:22:00' timestamp
        ,234 session_id, 345 session_serial#, v.* 
  from v$process_memory_detail v where 1=2;
  
-- sampling for p_dur seconds in p_interval seconds interval.
create or replace procedure pga_sampling(p_sid number, p_dur number := 120, p_interval number := 1) as
  l_start_time     number := dbms_utility.get_time;
  l_sample_time    number;
  l_sleep_time     number;
  l_pid            number;
  l_sid            number;
  l_serial#        number;
  l_run            number := 0;
  l_pga_status     varchar2(10) := 'NOT';
  l_run_name       varchar2(40) := 'PGA Sampling('||p_sid||', '||p_dur||', '||p_interval||')';
begin
  select pid, s.sid, s.serial#
    into l_pid, l_sid, l_serial#
    from v$process p, v$session s
   where p.addr = s.paddr and s.sid = p_sid and rownum = 1;  
               
  l_sample_time := dbms_utility.get_time;
  while ((l_sample_time - l_start_time)/100 < p_dur) loop 
    execute immediate q'[alter session set events 'immediate trace name PGA_DETAIL_GET level ]'||l_pid||q'[']'; 
    -- wait status = COMPLETE when status = SCANNING, or ENABLED but elapsed time is over duration
    while (true) loop
     select status into l_pga_status from v$process_memory_detail_prog where pid = l_pid;
     exit when l_pga_status = 'COMPLETE' or ((dbms_utility.get_time - l_start_time)/100 > p_dur);
     dbms_lock.sleep(0.1);
    end loop;
    delete from process_memory_detail_v 
     where pid = l_pid and session_id = l_sid and session_serial# = l_serial# and run = l_run;
    insert into process_memory_detail_v select l_run, l_run_name, systimestamp, l_sid, l_serial#, v.* 
      from v$process_memory_detail v where pid = l_pid;
    -- outcomment to keep last PGA_DETAIL
    -- execute immediate q'[alter session set events 'immediate trace name PGA_DETAIL_CANCEL level ]'||l_pid||q'[']';  
    commit; 
    l_run := l_run + 1;    
    l_sleep_time := p_interval - (dbms_utility.get_time - l_sample_time)/100;
    if l_sleep_time > 0 then
     dbms_lock.sleep(l_sleep_time);
    end if;
    l_sample_time := dbms_utility.get_time;
  end loop;
end;
/
    
-- exec pga_sampling(p_sid => 234);

References

  1. Oracle MOS Notes: How To Find Where The Memory Is Growing For A Process (Doc ID 822527.1)
  2. Oracle Memory Troubleshooting, Part 4: Drilling down into PGA memory usage with V$PROCESS_MEMORY_DETAIL 
  3. dbms_session package_memory_utilization
 
 

Monday, August 1, 2016

ORA-04025 library object lock on AQ queue and SQL cursor

AQ dequeue throws:
     ORA-04025: maximum allowed library object lock allocated for S.KSUN_QUEUE
in an Oracle 12c batch application when number of concurrent jobs greater than 65, where KSUN_QUEUE is the queue name, and DB is set up with following relevant parameters:
  _session_cached_instantiations       integer     1001
  cursor_sharing                       string      exact
  cursor_space_for_time                boolean     FALSE
  open_cursors                         integer     800
  session_cached_cursors               integer     600


Adhere to Oracle Documentation on ORA-04025:
  Cause:  Too many active locks for the object has been allocated.
               This error can be a result of too many cursors kept open per session.
  Action: Close cursors, reduce session_cached_cursors value.

Halving session_cached_cursors from 600 to 300 does not make the error disappear.

This Blog will try to demonstrate:
(a). there probably exists a hard limit of 65535 locks per library object:
             LIBRARY OBJECT LOCK: MAXIMUM 65535
       internally it could be implemented by a 16-bits u16.
(b). locks per Oracle session is imposed by
             _session_cached_instantiations
(c). maximum number of concurrent sessions to avoid ORA-04025 is
             floor(65535/_session_cached_instantiations)
      It is 65 for above settings.

Tests done on Solaris and AIX with Oracle 12.1.0.2.0.
See Blog appended Test Code.
Note: to make the test reproducible, before each test, restart DB to remove any un-released locks.

1. Library object lock on AQ queue


1.1 Single session dequeue/enqueue test:


SQL> exec test_deq(1200, 0, 0, true);
 deq_cnt: 1, lock_cnt: 1
 deq_cnt: 2, lock_cnt: 2
 deq_cnt: 3, lock_cnt: 3
 ...
 deq_cnt: 1000, lock_cnt: 1000
 deq_cnt: 1001, lock_cnt: 1001
 deq_cnt: 1002, lock_cnt: 1002
 deq_cnt: 1003, lock_cnt: 1002
 ...
 deq_cnt: 1199, lock_cnt: 1002
 deq_cnt: 1200, lock_cnt: 1002

It shows that number of locks on the queue object is increased for each dequeue call till 1002,
and then maintained at 1002.

Open a new session, make a similar enqueue test.

  SQL> exec test_enq(1200, 0, true);

The lock behavior is similar to dequeue. Therefore all the discussion on dequeue applies to enqueue as well.

1.2   66 Batch sessions dequeue test


In this test, we will start 66 dequeue sessions to provoke ORA-04025. All 3 monitoring queries can be run during whole tests to watch the progress of locks. Once total number of locks on KSUN_QUEUE is approaching to 65535, some sessions raise ORA_04025.

(A). Launch 66 concurrent Jobs by:


  SQL> exec test_deq_jobs(66, 2000);  

After about 18 minutes (1003 times of one-second dequeue wait), we will see some trace files like:

 ORA-12012: error on auto execute of job 86367
 ORA-04025: maximum allowed library object lock allocated for S.KSUN_QUEUE
 ORA-06512: at "S.TEST_DEQ", line 40
 ORA-06512: at line 1

(B). Watch total number of locks on KSUN_QUEUE:


SQL> select name, locks, pins, locked_total, pinned_total
       from v$db_object_cache l
      where hash_value = 3868150627 or name = 'KSUN_QUEUE';

 NAME         LOCKS  PINS LOCKED_TOTAL PINNED_TOTAL
 ----------- ------ ----- ------------ ------------
 KSUN_QUEUE   65106     0       321935       428400

SQL> select kglnaobj||'('||kglobtyd||')' name, object_handle, type, mode_held, mode_requested, count(*) cnt
       from v$libcache_locks l, x$kglob o
      where o.kglnaobj = 'KSUN_QUEUE'
        and l.object_handle = o.kglhdadr
      group by object_handle, kglnaobj, kglobtyd, type, mode_held, mode_requested;

 NAME               OBJECT_HANDLE    TYPE  MODE_HELD MODE_REQUESTED    CNT
 ------------------ ---------------- ---- ---------- -------------- ------
 KSUN_QUEUE(QUEUE)  0000000167A4B2E0 LOCK          1              0  65224

(C). Watch number of locks per session on KSUN_QUEUE:


SQL> with sq as
          (select /*+ materialize */ sid, program, saddr, event
              ,to_char(p1, 'xxxxxxxxxxxxxxx') p1, p1text
              ,to_char(p2, 'xxxxxxxxxxxxxxx') p2, p2text
              ,to_char(p3, 'xxxxxxxxxxxxxxx') p3, p3text
           from v$session
          where program like '%(J%' or module like '%deq%')
     select s.sid, holding_user_session, object_handle, l.type, mode_held, mode_requested,
            count(*) lock_cnt     -- it goes up to maximum 1003, and then back 1002
       from v$libcache_locks l, x$kglob o, sq s
      where o.kglnaobj = 'KSUN_QUEUE'
        and l.object_handle = o.kglhdadr
        and l.holding_user_session = s.saddr
     group by s.sid, holding_user_session, object_handle, l.type, mode_held, mode_requested
     order by lock_cnt desc, s.sid, holding_user_session, l.type, mode_held, mode_requested;

   SID HOLDING_USER_SES OBJECT_HANDLE    TYPE  MODE_HELD MODE_REQUESTED   LOCK_CNT
 ----- ---------------- ---------------- ---- ---------- -------------- ----------
     6 000000018FA49D60 0000000167A4B2E0 LOCK          1              0       1003
    70 000000018FA93AA0 0000000167A4B2E0 LOCK          1              0       1003
 ...
  1058 000000018FE84A60 0000000167A4B2E0 LOCK          1              0       1002
  1059 000000018D19A168 0000000167A4B2E0 LOCK          1              0       1002
   173 000000018ED35840 0000000167A4B2E0 LOCK          1              0        739
  1025 000000018D177398 0000000167A4B2E0 LOCK          1              0        739        

66 rows selected.        

Examining maximum number of locks per session, they oscillate between 1002 and 1003.

There exist sessions whose LOCK_CNT is not able to reach 1002 because
    66 x 1002 > 65535
hence ORA-04025.

(D). Match Oracle parameters with value like '100%':


To figure out what is the limit of 1003, run:

SQL> select name, value from v$parameter where value like '100%';

 NAME                             VALUE
 -------------------------------- -----
 _session_cached_instantiations   1001

That is _session_cached_instantiations.

(E). Finally clean-up all jobs

  SQL> exec clean_jobs;

1.3   65 Batch sessions dequeue test


Repeat the same test by launching 65 concurrent Jobs. No ORA-04025 occurs.

  SQL> exec test_deq_jobs(65, 2000);

Wait about 18 minutes, run above 3 monitoring queries.
Here the output:

SQL> query_1

 NAME         LOCKS  PINS LOCKED_TOTAL PINNED_TOTAL
 ----------- ------ ----- ------------ ------------
 KSUN_QUEUE   65130     0      2493796      3324170

SQL> query_2

 NAME               OBJECT_HANDLE    TYPE  MODE_HELD MODE_REQUESTED    CNT
 ------------------ ---------------- ---- ---------- -------------- ------
 KSUN_QUEUE(QUEUE)  0000000167A4B2E0 LOCK          1              0  65130
 
SQL> query_3
 
   SID HOLDING_USER_SES OBJECT_HANDLE    TYPE  MODE_HELD MODE_REQUESTED   LOCK_CNT
 ----- ---------------- ---------------- ---- ---------- -------------- ----------
     6 000000018FA49D60 0000000167A4B2E0 LOCK          1              0       1003
    40 000000018FA6CB30 0000000167A4B2E0 LOCK          1              0       1003
 ...   
  1027 000000018D1752C8 0000000167A4B2E0 LOCK          1              0       1002
  1058 000000018FE84A60 0000000167A4B2E0 LOCK          1              0       1002

65 rows selected.        

All sessions can own a minimum LOCK_CNT of 1002 because
    65 x 1002 < 65535
hence no ORA-04025.

To terminate the test, clean-up all jobs

  SQL> exec clean_jobs;

1.4 _session_cached_instantiations


Lowering _session_cached_instantiations from 1001 to 401, restart DB:
   alter system set "_session_cached_instantiations"=401 scope=spfile;
repeat the test, no more error even for 100 Jobs; but error reappears with 200 Jobs.

Since not clear what is the limit of "_session_cached_instantiations", it is worth of gambling its extreme behavior with two additional tests, one with 65535, another with 65536.

alter system set "_session_cached_instantiations"=65535 scope=spfile;

restart DB

SQL> exec test_deq(65536, 0, 0);

 deq_cnt: 65536, lock_cnt: 65535
 ORA-04025: maximum allowed library object lock allocated for S.KSUN_QUEUE

Then make some unrealistic adventure:

alter system set "_session_cached_instantiations"=65536 scope=spfile;

restart DB

SQL> exec test_deq(65536, 0, 0);

 deq_cnt: 65536, lock_cnt: 65535
 ORA-04025: maximum allowed library object lock allocated for K.KSUN_QUEUE


Both identical outputs show that sessions raise ORA-04025 at limit 65535, otherwise they violate the condition:
   floor(65535/_session_cached_instantiations) >= 1

1.5   12c vs. 11g dequeue behavior


Running the same tests in 11.2.0.4.0, number of locks per session on KSUN_QUEUE is constantly kept as 1. Hence, 12c dequeue behavior is perturbed, in which each dequeue call opens a new lock on the queue, but never release it till ceiling _session_cached_instantiations. One can wonder if that can be classified as a library object lock leak.

Oracle Database Advanced Queuing 12c seems introduced some new AQ background architecture, for example, AQPC (GV$AQ_BACKGROUND_COORDINATOR).

1.6 MOS Note


Resorting to MOS, there is one hit of 65535 hinting to ORA-4025.

MOS Bug 14542720 - ORA-4025 with CURSOR_SHARING / Dynamic Sampling (Doc ID 14542720.8) said:
  Look for the ActiveLocks value of some cursor to show very high values  (when it reaches 65535 the ora-4025 is raised)

2. SQL cursor and ORA-04025


As a general understanding of ORA-04025, it is necessary to examine its original designation for SQL Cursors. Pick the JAVA test code provided by Oracle MOS, we will make 3 tests.

Note: the same Java code is tested on both 11.2.0.4.0 and 12.1.0.2.0, the behaviors are identical.

At first, set open_cursors:
  alter system set open_cursors=50000 scope=both;

2.1 Single session


Start one single session, monitor by:

SQL> select sid, sql_id, sql_text, count(*) cnt
       from v$open_cursor
      where cursor_type in ('OPEN')
      group by sid, sql_id, sql_text
      order by cnt desc;

when opened cursors reaching 50000, it hits:
 ORA-01000: maximum open cursors exceeded
    
$1 > java OracleJdbcExample4025
 Count=49998, Date : 2016-07-21 10:50:17
 Count=49999, Date : 2016-07-21 10:50:17
 Exception in thread "main" java.sql.SQLException: ORA-01000: maximum open cursors exceeded


2.2 Two concurrent sessions


Start two sessions, both throw ORA-04025:

$1 > java OracleJdbcExample4025

 Count=25115, Date : 2016-07-21 11:46:45.0
 Count=25116, Date : 2016-07-21 11:46:45.0
 Exception in thread "main" java.sql.SQLException: 
  ORA-04025: maximum allowed library object lock allocated for select sysdate as current_day from dual

$2 > java OracleJdbcExample4025

 Count=40418, Date : 2016-07-21 11:46:45.0
 Count=40419, Date : 2016-07-21 11:46:45.0
 Exception in thread "main" java.sql.SQLException: 
  ORA-04025: maximum allowed library object lock allocated for select sysdate as current_day from dual

Summing two outputs agrees the equality:
   25116 + 40419 = 65535
which confirms "LIBRARY OBJECT LOCK: MAXIMUM 65535".

2.3 Boundary value


According to Oracle Documentation, open_cursors has a legal range of values:
    0 to 65535
what happens when setting open_cursor=65535 ?
Both ORA-01000 and ORA-04025 are satisfied by boundary value 65535, which one is raised at first ? (second will never be presented).

Here the test output:

alter system set open_cursors=65535 scope=both;

$1 > java OracleJdbcExample4025

  Count=65533, Date : 2016-07-22 08:01:49.0
  Count=65534, Date : 2016-07-22 08:01:49.0
  Exception in thread "main" java.sql.SQLException: ORA-01000: maximum open cursors exceeded


So we can guess that ORA-04025 is ordered after ORA-01000 in the code path. That is also why to provoke ORA-04025 for SQL cursor, minimum two sessions are required.

Summary


Recapping all the tests, we can draw a few possible observations, but I would be careful to have any earlier conclusions before Oracle says.

1. SQL cursor is a library object, but not each library object is a SQL cursor, for example, AQ queue.

2. OPEN_CURSORS specifies the maximum number of open cursors per session can have at each instant. HARD Limit.

3. SESSION_CACHED_CURSORS specifies the number of cursors to cache per session, an optimization advice. SOFT Limit.

4. OPEN_CURSORS, SESSION_CACHED_CURSORS are parameters applied to SQL cursor.

5. "LIBRARY OBJECT LOCK: MAXIMUM 65535" specifies limit per library object. HARD Limit.

6. 12c _SESSION_CACHED_INSTANTIATIONS specifies number of library object lock on AQ queue per session.
    HARD Limit, but not raising error.

7. "LIBRARY OBJECT LOCK: MAXIMUM 65535" and _SESSION_CACHED_INSTANTIATIONS together defines
     maximum number of concurrent sessions to avoid ORA-04025:
            floor(65535/_session_cached_instantiations)

References


1. open_cursors, session_cached_cursors: cursordump

2. Oracle 12c single session "library cache lock (cycle)" deadlock

3. Foreign Keys and Library Cache Locks: Analysis

Test Code



------------------------------------- AQ dequeue ---------------------------------
--alter system set "_session_cached_instantiations"=1001 scope=spfile;
--alter system set open_cursors=800 scope=both;
--alter system set session_cached_cursors=600 scope=spfile;

drop type Q_OBJ force;

create or replace type Q_OBJ is object (id number, name varchar2(100));
/       

begin sys.dbms_aqadm.drop_queue_table(queue_table => 'Q_TAB', force=> TRUE); end;
/

begin
  sys.dbms_aqadm.create_queue_table
  (queue_table        => 'Q_TAB'
  ,queue_payload_type => 'Q_OBJ'
  ,compatible         => '10.0.0'  
  ,sort_list          => 'PRIORITY,ENQ_TIME'
  ,multiple_consumers =>  false
  ,message_grouping   =>  0
  ,comment            =>  'queue table'
  ,secure             =>  false);
end;
/

begin
  sys.dbms_aqadm.stop_queue (queue_name => 'KSUN_QUEUE');
  sys.dbms_aqadm.drop_queue (queue_name => 'KSUN_QUEUE');
end;
/

begin
  sys.dbms_aqadm.create_queue
  (queue_name     => 'KSUN_QUEUE'
  ,queue_table    => 'Q_TAB'
  ,queue_type     => sys.dbms_aqadm.normal_queue
  ,max_retries    => 100
  ,retry_delay    => 2
  ,retention_time => 604800
  ,comment        => 'ksun queue');
end;
/

begin sys.dbms_aqadm.start_queue(queue_name => 'KSUN_QUEUE', enqueue => true, dequeue => true); end;
/
  
create or replace procedure test_enq(p_cnt number := 1, p_sleep number := 0, p_prt boolean :=false) as
  l_enqueue_options dbms_aq.enqueue_options_t;
  l_msg_properties  dbms_aq.message_properties_t;
  l_msg_handle      raw(16);
  l_msg             q_obj;
  l_lock_cnt        number;
  
  procedure prt(i_cnt number, i_prt boolean) as
   begin
    if i_prt then
     select locks into l_lock_cnt from v$db_object_cache where name = 'KSUN_QUEUE';
     dbms_output.put_line ('deq_cnt: '||i_cnt||', lock_cnt: '||l_lock_cnt);
   end if;
   end;
begin 
 dbms_application_info.set_module(module_name => 'enq session', action_name => 'enqueue');
  for i in 1..p_cnt loop
    l_msg := q_obj(i, rpad(i, 10, 'X'));
            
    dbms_aq.enqueue(queue_name         => 'KSUN_QUEUE',
                    enqueue_options    => l_enqueue_options,
                    message_properties => l_msg_properties,
                    payload            => l_msg,
                    msgid              => l_msg_handle);
    prt(i, p_prt);
    
    commit;
    
    if p_sleep > 0 then
      dbms_lock.sleep(p_sleep);
    end if;
  end loop;
end;
/

create or replace procedure test_deq(p_cnt number := 1, p_sleep number := 0, p_wait number := 1, 
                                     p_prt boolean :=false) as
 l_dequeue_options     dbms_aq.dequeue_options_t;
 l_msg_properties      dbms_aq.message_properties_t;
 l_msg_handle          raw(16);
 l_msg                 q_obj;
 l_lock_cnt            number;
 no_messages_excp      exception;
 pragma exception_init (no_messages_excp, -25228);
  
  procedure prt(i_cnt number, i_prt boolean) as
   begin
    if i_prt then
     select locks into l_lock_cnt from v$db_object_cache where name = 'KSUN_QUEUE';
     dbms_output.put_line ('deq_cnt: '||i_cnt||', lock_cnt: '||l_lock_cnt);
   end if;
   end;
begin
 dbms_application_info.set_module(module_name => 'deq session', action_name => 'dequeue');
 l_dequeue_options.wait := p_wait;
  for i in 1..p_cnt loop
  begin 
   if p_sleep > 0 then
    dbms_lock.sleep(p_sleep);
   end if;
   
    dbms_aq.dequeue(queue_name            => 'KSUN_QUEUE',
                    dequeue_options       => l_dequeue_options,
                    message_properties    => l_msg_properties,
                    payload               => l_msg,
                    msgid                 => l_msg_handle);
          
    dbms_output.put_line ('MSG id: '||l_msg.id||', name: '||l_msg.name);
    
    prt(i, p_prt);
    
    commit;
      exception when no_messages_excp then prt(i, p_prt);
         when others then prt(i, true); 
                  raise;  
   end;
 end loop;
end;
/    

create or replace procedure test_deq_jobs (p_jobs number := 66, p_cnt number := 2000) as
 l_job_id pls_integer;
begin
  for i in 1..p_jobs loop
  dbms_job.submit(l_job_id, 'begin while true loop test_deq('||p_cnt||'); end loop; end;');
  commit;
  dbms_output.put_line(l_job_id);
 end loop;
end;
/        

create or replace procedure clean_jobs as
  begin
   for c in (select job from dba_jobs) loop
      begin
         dbms_job.remove (c.job);
      exception when others then null;
      end;
      commit;
   end loop;

   for c in (select d.job, d.sid, (select serial# from v$session where sid = d.sid) ser 
               from dba_jobs_running d) loop
      begin
         execute immediate
               'alter system kill session '''|| c.sid|| ',' || c.ser|| ''' immediate';
         dbms_job.remove (c.job);
      exception when others then null;
      end;
      commit;
   end loop;
   
   -- select * from dba_jobs;
   -- select * from dba_jobs_running;
end;
/

------------------------------------- SQL curosr and ORA-04025 Test ---------------------------------

import java.sql.Connection; 
import java.sql.DriverManager; 
import java.sql.PreparedStatement; 
import java.sql.ResultSet; 
import java.sql.SQLException;

public class OracleJdbcExample4025 { 

public static void main(String args[]) throws SQLException { 
 System.out.println("--------SQL curosr and ORA-04025 Test ------");
 try {
  Class.forName("oracle.jdbc.driver.OracleDriver");
 } catch (ClassNotFoundException e) {
  System.out.println("Where is your Oracle JDBC Driver?");
  e.printStackTrace();
  return;
 }
 System.out.println("Oracle JDBC Driver Registered!");
 Connection connection = null;
 try {
  connection = DriverManager.getConnection(
    "jdbc:oracle:thin:@testdb:1522:testdb", "s", "s");  // adapt it
 } catch (SQLException e) {
  System.out.println("Connection Failed! Check output console");
  e.printStackTrace();
  return;
 }    
 String sql ="select sysdate as current_day from dual"; 
 int cnt = 0;
 while (true) { 
  //creating PreparedStatement object to execute query 
  PreparedStatement preStatement = connection.prepareStatement(sql); //<<< this opening cursor 
  ResultSet result = preStatement.executeQuery(); 
  while(result.next()){ 
   System.out.println("Count=" + (++cnt) + ", Date : " + result.getString("current_day")); 
  } 
 // preStatement.close();  //<<< if this is uncommented cursor is closed, otherwise not. 
 } 
 // System.out.println("done"); 
 } 
} 

Monday, June 13, 2016

SQL Parsing in serializable transaction throws ORA-08177: can't serialize access for this transaction

Setting serializable isolation level in Oracle 12c (12.1.0.2.0), SQL Parsing can throw:
  ORA-08177: can't serialize access for this transaction

run test (see Blog appended TestCode):
 
SQL (742) > exec ora_08177_test(10, 'ksun_ora_08177_collect_listagg');

Run=1
ERROR at line 1:
ORA-08177: can't serialize access for this transaction
ORA-06512: at "ORA_08177_TEST", line 20
ORA-06512: at line 1

Note: Each time, open a new session, run the test. After first run, subsequent runs in the same session do not hit the error.

According to Oracle documentation: Database Concepts - Data Concurrency and Consistency

The database generates an error when a serializable transaction tries to update or delete data changed by a different transaction that committed after the serializable transaction began:
 ORA-08177: Cannot serialize access for this transaction


Here there exists only one single session, so it does not satisfy the requirement of above documentation.

Reasoning


Refer to the recursive session discussed in Blog: Oracle 12c single session "library cache lock (cycle)" deadlock .

run query:
 
select distinct s.sid, holding_user_session, holding_session
  from v$libcache_locks c, v$session s
 where lnnvl(holding_user_session = holding_session)
   and c.holding_user_session = s.saddr 
   and s.sid = 742;
   
SID HOLDING_USER_SES HOLDING_SESSION 
--- ---------------- ----------------
742 070001010ECF8680 070001010ECFC880    

(742 is the SID of test session)

crosscheck the dump generated by 8177 trace event on AIX (we set this event for the test):

----- Call Stack Trace -----
ktrexc()+3536        call     ksesecl0()           FFFFFFFFFFC9FB0 ? 122F5F490 ?
rpidrv()+3572        call     rpiswu2()            70001010ECFC880 ? 1221BF1E0 ?
kksLoadChild()+3544  call     rpiswu2()            70001010ECF8680 ? 000000090 ?
...
========== FRAME [19] (ktrexc()+3536 -> ksesecl0()) ==========
defined by frame pointers 0xffffffffffca080  and 0xffffffffffc9ee0
CALL TYPE: call   ERROR SIGNALED: yes   COMPONENT: (null)
...
PROCESS STATE
-------------
Process global information:
     process: 0x70001010e8c4078, call: 0x7000100eb0e7bb0, xact: 0x7000101037e9880, 
     curses: 0x70001010ecfc880, usrses: 0x70001010ecf8680
...
KSUSETRN = 0X07000101037E9880
...
(trans) flg = 0x00000004, flg2 = 0x00400000, flg3 = 0x00000040, 
  prx = 0x7000101037e9880, ros = 2147483647, crtses=0x70001010ecf8680

we can establish two equations:

holding_user_session = 070001010ECF8680 = usrses: 0x70001010ecf8680
holding_session      = 070001010ECFC880 = curses: 0x70001010ecfc880

which tellingly indicate the co-existence of two different sessions, usrses(user session) and curses (cursor session), and existence of transaction (0X07000101037E9880) created by user session (crtses=0x70001010ecf8680).

Looking Call Stack Trace, user session calls:
   kksLoadChild()
and then switches to cursor session, which triggers rpi (recursive procedure interface):
 rpidrv()
and finally ERROR SIGNALED at:
  ktrexc()
where ktr means kernel transaction read consistency.

Since user session already started a transaction, any attempt to modify the data by the cursor session resulted in:
 ORA-08177: can't serialize access for this transaction

Run other two tests:

SQL (742) > exec ora_08177_test(10, 'ksun_ora_08177_collect');
SQL (742) > exec ora_08177_test(10, 'ksun_ora_08177_listagg');

there are no errors.

Hence, the error is probably related to certain constellation of collect and listagg.

All tests are performed on AIX, Solaris and Linux with Oracle 12c (12.1.0.2.0).
It is not reproducible in Oracle 11.2.0.4.0.

By the way, the first run of query:

SQL (742)> select collect (y) from ksun_ora_08177;
SYSTPNSRcj9+JXnjgVAAmVdl5Ow==(1, 2, 3, 0, 1, 2, 3, 0, 1)

creates a new TYPE:

TYPE     "SYSTPNSRcj9+JXnjgVAAmVdl5Ow==" AS TABLE OF NUMBER

which is due to "COLLECT" function.

It can be verified by:

select s.*, o.last_ddl_time, o.created, o.*
  from dba_source s, dba_objects o
 where s.name = o. object_name
   and o.object_type = 'TYPE'
   and name = 'SYSTPNSRcj9+JXnjgVAAmVdl5Ow==';

and also documented in Database SQL Language Reference:

COLLECT is an aggregate function that takes as its argument a column of any type
and creates a nested table of the input type out of the rows selected.


Another example of TYPE creation was observed in Blog: Oracle 12c single session "library cache lock (cycle)" deadlock.

TestCode


--------------------
set numformat 99
column y_coll format a10

alter system set events '8177 trace name errorstack level 10';

drop table ksun_ora_08177;

create table ksun_ora_08177 as select mod(level, 2) x, mod(level, 4) y from dual connect by level <= 9; 

select * from ksun_ora_08177;

create or replace type ksun_tab force is table of number;
/

--------------------
create or replace force view ksun_ora_08177_collect_listagg(x, cnt, y_coll) as
with sq as (
  select x, count(*) cnt, cast (collect (distinct y) as ksun_tab) y
  from ksun_ora_08177
  group by x)
select x, cnt,
       (select listagg (column_value, ',') within group (order by column_value)
          from table (y)) y_coll
from sq 
where x is not null;

--------------------

create or replace force view ksun_ora_08177_collect(x, cnt, y_coll) as
select x, count(*) cnt, cast (collect (distinct y) as ksun_tab) y_coll
from ksun_ora_08177
where x is not null
group by x;

--------------------
create or replace force view ksun_ora_08177_listagg(x, cnt, y_coll) as
select x, count(*) cnt, listagg (y, ',') within group (order by y) y_coll
from ksun_ora_08177
where x is not null
group by x;

--------------------
create or replace procedure ora_08177_test(p_cnt number, p_view varchar2) as
 l_cur       integer;
 l_qry       varchar2(300) := 'select * from '||p_view;
 l_idx           pls_integer;
 l_col_desc_tab  dbms_sql.desc_tab;
begin
 l_qry := 'select /* '||p_view ||' */ * from '||p_view;
 for i in 1..p_cnt loop
  dbms_output.put_line('Run='||i);
  execute immediate 'alter system flush shared_pool';

  begin
    commit;
    set transaction isolation level serializable;
    l_cur := dbms_sql.open_cursor;
    dbms_sql.parse(l_cur, l_qry, dbms_sql.native);
    dbms_sql.describe_columns (l_cur, l_idx, l_col_desc_tab);
    dbms_sql.close_cursor(l_cur);
    exception when others then
      dbms_sql.close_cursor(l_cur);
      raise;
  end;
 end loop;
end;
/

-- Note: Each time, open a new session, run the test. After first run, subsequent runs in the same session do not hit the error.
-- exec ora_08177_test(10, 'ksun_ora_08177_collect_listagg');
-- exec ora_08177_test(10, 'ksun_ora_08177_collect');
-- exec ora_08177_test(10, 'ksun_ora_08177_listagg');