Tuesday, November 1, 2016

"library cache: mutex X" and Application Context

Heavy Event: "library cache: mutex X" is observed when Application Context is frequently changed. The application is using Oracle Virtual Private Database to regulate data access with driving application context, which determines which policy group is in effect for each use case.

In this Blog, Application Context is used as a concrete case to discuss Oracle "library cache: mutex X".

Note: All tests are done in Oracle 12.1.0.2 on AIX, Solaris, Linux with 6 physical processors.


1. Test


Run the appended Test Code by launching 4 Jobs:
  exec ctx_set_jobs(4);

Monitor Job sessions:

select sid, program, event, p1text, p1, p2text, p2, p3text, p3
  from v$session where program like '%(J%';

  SID PROGRAM              EVENT                     P1TEXT          P1 P2TEXT             P2 P3TEXT                P3
----- -------------------- ------------------------- ------ ----------- ------ -------------- ------ -----------------
   38 oracle@testdb (J003) library cache: mutex X    idn     1317011825 value   3968549781504 where   9041305591414788
  890 oracle@testdb (J000) library cache: mutex X    idn     1317011825 value    163208757248 where   9041305591414874
  924 oracle@testdb (J001) library cache: mutex X    idn     1317011825 value   4556960301056 where   9041305591414874
 1061 oracle@testdb (J002) library cache: mutex X    idn     1317011825 value   3968549781504 where   9041305591414879

Pick idn (P1): 1317011825, and query v$db_object_cache:

select name, namespace, type, hash_value, locks, pins, locked_total, pinned_total
from v$db_object_cache where hash_value in (1317011825);

NAME       NAMESPACE       TYPE             HASH_VALUE       LOCKS        PINS LOCKED_TOTAL PINNED_TOTAL
---------- --------------- --------------- ----------- ----------- ----------- ------------ ------------
TEST_CTX   APP CONTEXT     APP CONTEXT      1317011825           4           0            4    257802287

It shows that "library cache: mutex X" is on application context: TEST_CTX, and PINNED_TOTAL is probably increased for each access.
Although TEST_CTX is a local context and its values is stored in the User Global Area (UGA), the content of "library cache: mutex X" is globally on its definition.

select namespace, package, type from dba_context where namespace = 'TEST_CTX';

NAMESPACE  PACKAGE       TYPE
---------- ------------  ----------------
TEST_CTX   TEST_CTX_PKG  ACCESSED LOCALLY

After test, clean-up all jobs by:
  exec clean_jobs;


2. Mutex Contention and Performance


Run the test, monitor Mutex Contention and Performance:

SQL > exec ctx_set_jobs(4);

column owner format a6
column name format a10
column property format a10
column namespace format a12
column type format a12

SQL > select owner, name, property, hash_value, locks, pins, locked_total, pinned_total, executions, sharable_mem, namespace, type, full_hash_value
      from  v$db_object_cache v
      where (name in ('TEST_CTX') or hash_value in (1317011825) or property like '%HOT%');

OWNER  NAME       PROPERTY   HASH_VALUE  LOCKS  PINS LOCKED_TOTAL PINNED_TOTAL EXECUTIONS SHARABLE_MEM NAMESPACE    TYPE         FULL_HASH_VALUE
------ ---------- ---------- ---------- ------ ----- ------------ ------------ ---------- ------------ ------------ ------------ --------------------------------
SYS    TEST_CTX              1317011825      4     0            4   1191106893 1191106886         4096 APP CONTEXT  APP CONTEXT  3581f5a97dfac7485a3330954e800171

SQL > select * from v$mutex_sleep order by sleeps desc, location;

MUTEX_TYPE        LOCATION                        SLEEPS  WAIT_TIME 
----------------- --------------------------- ---------- ---------- 
Library Cache     kglpndl1  95                    760307   76983943 
Library Cache     kglpin1   4                     288210  172310041 
Library Cache     kglpnal1  90                    193282   38493570 
Library Cache     kglGetHandleReference 123            1          0 

--display mutex REQUESTING/BLOCKING details for each session
SQL > select * from v$mutex_sleep_history order by sleep_timestamp desc, location;

MUTEX_IDENTIFIER SLEEP_TIMESTAMP      MUTEX_TYPE          GETS  SLEEPS REQUESTING_SESSION BLOCKING_SESSION LOCATION                    MUTEX_VALUE       P1  P1RAW           
---------------- -------------------- ------------- ---------- ------- ------------------ ---------------- -------------------------- ------------------ --- ----------------
      1317011825 24-OCT-2017 14:15:40 Library Cache  675726540  449377                  7                0 kglpin1   4                 00                 0  0000000176D1E860
      1317011825 24-OCT-2017 14:15:40 Library Cache  675726542  442641                368                0 kglpndl1  95                00                 0  0000000176D1E860
      1317011825 24-OCT-2017 14:15:40 Library Cache  675711683  444299                901                7 kglpndl1  95                0000000700000000   0  0000000176D1E860
      1317011825 24-OCT-2017 14:15:40 Library Cache  675709618  438207                187                0 kglpin1   4                 00                 0  0000000176D1E860
      1317011825 24-OCT-2017 14:09:06 Library Cache    2806872       1                900                0 kglGetHandleReference 123   00                 0  0000000176D1E860


Pick spid of one Oracle session, for example, 10684, get callstack:

$ > pstack 10684
10684:  ora_j000_testdb 
 fffffd7ffc9d3e3b semsys   (4, e000013, fffffd7fffdf5658, 1, fffffd7fffdf5660)
 0000000001ab9008 sskgpwwait () + f8
 0000000001ab8c95 skgpwwait () + c5
 0000000001c710d5 ksliwat () + 8f5
 0000000001c70410 kslwaitctx () + 90
 0000000001e6ffb0 kgxWait () + 520
 000000000dd1ae6f kgxExclusive () + 1cf
 00000000021cc025 kglGetMutex () + b5
 000000000212400e kglpin () + 2fe
 00000000026aa159 kglpnp () + 269
 00000000026a71ab kgiina () + 1db
 000000000dd118b9 kgintu_named_toplevel_unit () + 39
 0000000007ac16a6 kzctxBInfoGet () + 746
 0000000007ac38ed kzctxChkTyp () + fd
 0000000007ac43f0 kzctxesc () + 510
 0000000002781d9d pevm_icd_call_common () + 29d
 0000000002781930 pfrinstr_ICAL () + 90
 0000000001a435ca pfrrun_no_tool () + 12a
 0000000001a411e0 pfrrun () + 4c0
 0000000001a3fb48 plsql_run () + 288

Where semsys(4, ...) is semtimedop(int semid, struct sembuf *sops, size_t nsops, const struct timespec *timeout) specified in syscall.h.

Run a small dtrace script:

$ > sudo dtrace -n \
'BEGIN {self->start_wts = walltimestamp; self->start_ts = timestamp;}
pid$target::kglpndl:entry /execname == "oracle"/ { self->rc = 1; }
pid$target::kgxExclusive:entry /execname == "oracle" && self->rc == 1/ { self->ts = timestamp; }
pid$target::kgxExclusive:return /self->ts > 0/ {  
 @lquant["ns"] = lquantize(timestamp - self->ts, 0, 10000, 1000); 
 @avgs["AVG_ns"] = avg(timestamp - self->ts);
 @mins["MIN_ns"] = min(timestamp - self->ts);
 @maxs["MAX_ns"] = max(timestamp - self->ts);
 @sums["SUM_ms"] = sum((timestamp - self->ts)/1000000);
 @counts[ustack(10, 0)] = count(); 
 self->rc = 0; self->ts = 0;}
END { printf("Start: %Y, End: %Y, Elapsed_ms: %d\n", self->start_wts, walltimestamp, (timestamp - self->start_ts)/1000000);}
' -p 10684

dtrace: description 'BEGIN ' matched 8 probes
Start: 2017 Oct 24 14:30:02, End: 2017 Oct 24 14:31:08, Elapsed_ms: 66183

  ns
     value  ------------- Distribution ------------- count
       < 0 |                                         0
         0 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@         3352394
      1000 |@@@@@@@@                                 803168
      2000 |                                         11598
      3000 |                                         1484
      4000 |                                         890
      5000 |                                         626
      6000 |                                         460
      7000 |                                         315
      8000 |                                         265
      9000 |                                         147
  >= 10000 |                                         2227

  AVG_ns                                                         1999
  MIN_ns                                                          777
  MAX_ns                                                     20411473
  SUM_ms                                                         4214

      oracle`kgxExclusive+0x105
      oracle`kglpndl+0x1fe
      oracle`kglUnPin+0x101
      a.out`kzctxChkTyp+0x14e
      a.out`kzctxesc+0x510
      a.out`pevm_icd_call_common+0x29d
      a.out`pfrinstr_ICAL+0x90
      oracle`pfrrun_no_tool+0x12a
      oracle`pfrrun+0x4c0
      oracle`plsql_run+0x288
  4173574
The above CallStack shows that kgxExclusive is triggered by kglpin via kglGetMutex.

Solaris "prstat -mL" show about 30% percentage of time the process has spent sleeping (SLP).


3. Hot library cache objects


Learning from Blog: Divide and conquer the true mutex contention
 
    KGLPIN:   KGL: PIN heaps and load data pieces of an object 
    KGLPNDL:  KGL PiN DeLete
    KGLPNAL1: KGL PiN ALlOcate
    
    KGLHBH1 63, KGLHDGN2 106: Invalid Password, Application Context(eg: SYS_CONTEXT)
"library cache: mutex X" can be allevaited by creating multiple copies of hot objects, which are controlled by two hidden parameters:
    _kgl_hot_object_copies: controls the maximum number of copies
    _kgl_debug:             marks hot library cache objects as a candidate for cloning
Configure two hidden parameters:

SQL > alter system set "_kgl_hot_object_copies"= 255 scope=spfile;

      --alter system reset "_kgl_hot_object_copies" scope=spfile;

SQL > alter system set "_kgl_debug"=
      "name='TEST_CTX' schema='SYS'    namespace=21 debug=33554432", 
      "name='PLITBLM'  schema='PUBLIC' namespace=1  debug=33554432"
      scope=spfile;
      
      --alter system reset "_kgl_debug" scope=spfile;

--- added in 25-Apr-2021 ---
  MOS Bug 19373224 - dbms_shared_pool.unmarkhot spins if _kgl_hot_object_copies is 255 (Doc ID 19373224.8) wrote
    Executing dbms_shared_pool.unmarkhot() might have entered a spin under kglget()  if 
     _kgl_hot_object_copies was set (or derived) to a value of 255. 
    Workaround
      Reduce the value of _kgl_hot_object_copies to 254 or below.
PUBLIC SYNONYM (namespace=1) 'PLITBLM' is added here to show multiple library cache objects can be specified in _kgl_debug. PLITBLM is package for PLSQL Index TaBLe Mangement, i.e PLSQL Collections (Associative Arrays, Nested Table, Varrays). All its implementations are through c interface.

library cache object NAMESPACE number, NAMESPACE name and TYPE name can be listed by following queries ("_kgl_debug" and dbms_shared_pool.markhot accept number as NAMESPACE):

SQL > select distinct namespace, object_type from dba_objects v order by 1;

SQL > select distinct namespace, type# from sys.obj$ order by 1;

SQL > select distinct kglhdnsp NAMESPACE_id, kglhdnsd NAMESPACE_name from x$kglob 
      --where kglhdnsd in ('APP CONTEXT')
      order by kglhdnsp;

SQL > select distinct kglobtyp TYPE_id, kglobtyd TYPE_name from x$kglob 
      --where kglobtyd in ('APP CONTEXT')
      order by kglobtyp;
Run the same test:

-- Stop all Jobs
SQL > exec clean_jobs;

--Restart DB to activate Hot library cache objects
SQL> startup force

SQL > select owner, name, property, hash_value, locks, pins, locked_total, pinned_total, executions, sharable_mem, namespace, type, full_hash_value
      from  v$db_object_cache v
      where (name in ('TEST_CTX') or hash_value in (1317011825) or property like '%HOT%');
      
OWNER  NAME       PROPERTY   HASH_VALUE LOCKS PINS LOCKED_TOTAL PINNED_TOTAL EXECUTIONS SHARABLE_MEM NAMESPACE    TYPE     FULL_HASH_VALUE
------ ---------- ---------- ---------- ----- ---- ------------ ------------ ---------- ------------ ------------ -------- -----------------------------
SYS    TEST_CTX   HOT        1317011825     0    0            1            0          0         0 APP CONTEXT  CURSOR   3581f5a97dfac7485a3330954e800171       
 
SQL > exec ctx_set_jobs(4); 
 
SQL > select sid, program, event, p1text, p1, p2text, p2, p3text, p3
      from v$session where program like '%(J%'; 

 SID PROGRAM                  EVENT                     P1TEXT          P1 P2TEXT             P2 P3TEXT                P3
---- ------------------------ ------------------------- ------ ----------- ------ -------------- ------ -----------------
   5 oracle@s5d00003 (J001)   null event                                 0                     0                        0
 186 oracle@s5d00003 (J004)   null event                                 0                     0                        0
 369 oracle@s5d00003 (J005)   null event                                 0                     0                        0
 902 oracle@s5d00003 (J000)   null event                                 0                     0                        0
 
SQL > select owner, name, property, hash_value, locks, pins, locked_total, pinned_total, executions, sharable_mem, namespace, type, full_hash_value
      from  v$db_object_cache v
      where (name in ('TEST_CTX') or hash_value in (1317011825) or property like '%HOT%'); 
 
OWNER  NAME       PROPERTY   HASH_VALUE LOCKS PINS LOCKED_TOTAL PINNED_TOTAL EXECUTIONS SHARABLE_MEM NAMESPACE    TYPE         FULL_HASH_VALUE
------ ---------- ---------- ---------- ----- ---- ------------ ------------ ---------- ------------ ------------ ------------ --------------------------------
SYS    TEST_CTX   HOT        1317011825     0    0            1            0          0         0    APP CONTEXT  CURSOR       3581f5a97dfac7485a3330954e800171
SYS    TEST_CTX   HOTCOPY6   1487681198     1    0            2    151394920  151394917      4096    APP CONTEXT  APP CONTEXT  1047cbfbca3fd100cf5758a258ac36ae
SYS    TEST_CTX   HOTCOPY138 3082567164     1    0            2    151821083  151821080      4096    APP CONTEXT  APP CONTEXT  31ac20c09044dfec01acd307b7bc3dfc
SYS    TEST_CTX   HOTCOPY187 3192676979     1    0            2    151252013  151252010      4096    APP CONTEXT  APP CONTEXT  6635075a7adcc68672a02262be4c6273
SYS    TEST_CTX   HOTCOPY115 4198626891     1    0            2    150529629  150529626      4096    APP CONTEXT  APP CONTEXT  d6724a1c9f480d55f73dc8fcfa41f64b 

SQL > select * from v$mutex_sleep order by sleeps desc, location;

MUTEX_TYPE                       LOCATION                                     SLEEPS  WAIT_TIME
-------------------------------- ---------------------------------------- ---------- ----------
Cursor Pin                       kkslce [KKSCHLPIN2]                               2      20118

SQL > select * from v$mutex_sleep_history order by sleep_timestamp desc, location;

MUTEX_IDENTIFIER SLEEP_TIMESTAMP      MUTEX_TYPE GETS SLEEPS REQUESTING_SESSION BLOCKING_SESSION LOCATION            MUTEX_VALUE      P1 P1RAW
---------------- -------------------- ---------- ---- ------ ------------------ ---------------- ------------------- ---------------- -- -----
      2816823972 24-OCT-2017 15:09:13 Cursor Pin    1      1                183              364 kkslce [KKSCHLPIN2] 0000016C00000000  2 00
      2214650983 24-OCT-2017 15:04:50 Cursor Pin    1      1                  5              902 kkslce [KKSCHLPIN2] 0000038600000000  2 00


Run the same dtrace script:

$ > sudo dtrace -n \
'BEGIN {self->start_wts = walltimestamp; self->start_ts = timestamp;}
pid$target::kglpndl:entry /execname == "oracle"/ { self->rc = 1; }
pid$target::kgxExclusive:entry /execname == "oracle" && self->rc == 1/ { self->ts = timestamp; }
pid$target::kgxExclusive:return /self->ts > 0/ {  
 @lquant["ns"] = lquantize(timestamp - self->ts, 0, 10000, 1000); 
 @avgs["AVG_ns"] = avg(timestamp - self->ts);
 @mins["MIN_ns"] = min(timestamp - self->ts);
 @maxs["MAX_ns"] = max(timestamp - self->ts);
 @sums["SUM_ms"] = sum((timestamp - self->ts)/1000000);
 @counts[ustack(10, 0)] = count(); 
 self->rc = 0; self->ts = 0;}
END { printf("Start: %Y, End: %Y, Elapsed_ms: %d\n", self->start_wts, walltimestamp, (timestamp - self->start_ts)/1000000);}
' -p 11751

Start: 2017 Oct 24 15:21:02, End: 2017 Oct 24 15:22:40, Elapsed_ms: 97999
  ns
     value  ------------- Distribution ------------- count
       < 0 |                                         0
         0 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@   8050589
      1000 |@@                                       330902
      2000 |                                         1106
      3000 |                                         1606
      4000 |                                         1352
      5000 |                                         630
      6000 |                                         322
      7000 |                                         201
      8000 |                                         133
      9000 |                                         94
  >= 10000 |                                         481

  AVG_ns         897
  MIN_ns         813
  MAX_ns      315083
  SUM_ms           0

      oracle`kgxExclusive+0x105
      oracle`kglpndl+0x1fe
      oracle`kglUnPin+0x101
      a.out`kzctxChkTyp+0x14e
      a.out`kzctxesc+0x510
      a.out`pevm_icd_call_common+0x29d
      a.out`pfrinstr_ICAL+0x90
      oracle`pfrrun_no_tool+0x12a
      oracle`pfrrun+0x4c0
      oracle`plsql_run+0x288
  8387416
Solaris "prstat -mL" show almost 100% percentage of time the process has spent in user mode (USR).

Try with official API in dbms_shared_pool, it seems that NAMESPACE: 'APP CONTEXT' not yet supported.

-- Stop all Jobs
SQL > exec clean_jobs;

SQL > alter system reset "_kgl_debug" scope=spfile;

--Restart DB
SQL> startup force

SQL > exec sys.dbms_shared_pool.markhot('SYS', 'TEST_CTX', 21);
      --exec sys.dbms_shared_pool.unmarkhot('SYS', 'TEST_CTX', 21);

 ORA-26680: object type not supported
 ORA-06512: at "SYS.DBMS_SHARED_POOL", line 133
 
-- Using 32 Byte (16 hexadecimal) V$DB_OBJECT_CACHE.FULL_HASH_VALUE
SQL > exec sys.dbms_shared_pool.markhot(hash=>'3581f5a97dfac7485a3330954e800171', NAMESPACE=>21);
      --exec sys.dbms_shared_pool.unmarkhot(hash=>'3581f5a97dfac7485a3330954e800171', NAMESPACE=>21);

 ORA-26680: object type not supported
 ORA-06512: at "SYS.DBMS_SHARED_POOL", line 138
Comparing "_kgl_debug" and markhot, it seems that "_kgl_debug" is persistent after DB restart, but not always stable after DB restart. Several sessions can still contend for the same library cache objects without creating/using HOT objects.

Whereas markhot seems stable after DB restart, but not always persistent after DB restart. Moreover, markhot does not support all NAMESPACEs of library cache objects.

When a SYNONYM is marked HOT, it can encounter core dump with Error:
    ORA-00600: internal error code, arguments: [kgltti-no-dep1]
with CallStack:
 
  kgltti()+1358            -> dbgeEndDDEInvocation() // ERROR SIGNALED: yes COMPONENT: LIBCACHE
  kqlCompileSynonym()+3840 -> kgltti() 
  kqllod_new()+3768        -> kqlCompileSynonym() 
  kqlCallback()+79         -> kqllod_new() 
  kqllod()+710             -> kqlCallback() 
  kglobld()+1058           -> kqllod() 
  kglobpn()+1232           -> kglobld() 
  kglpim()+489             -> kglobpn() 
  kglpin()+1785            -> kglpim() 
  kglgob()+493             -> kglpin() 
  kgiind()+1529            -> kglgob() 
  pfri8_inst_spec()+126    -> kgiind() 
  pfri1_inst_spec()+69     -> pfri8_inst_spec() 
  pfrrun()+1506            -> pfri1_inst_spec() 
  plsql_run()+648          -> pfrrun() 
This error is addressed by Oracle MOS:
    ORA-00600 [kgltti-no-dep1] When Synonym Marked Hot (Doc ID 2153847.1)


4. V$MUTEX_SLEEP_HISTORY


V$MUTEX_SLEEP_HISTORY displays time-series data. Each row in this view is for a specific time, mutex type, location, requesting session and blocking session combination. The data in this view is contained within a circular buffer, with the most recent sleeps shown (Oracle V$MUTEX_SLEEP_HISTORY).

Two fields are documented as:
  GETS    The total number of gets since the mutex was created and up until the time of the wait 
          (and from all sessions past and present)
  SLEEPS  The number of times the requester had to sleep before obtaining the mutex
It seems that GETS is an instance-wide accumulated historized data, whereas SLEEPS is per REQUESTING_SESSION for a specific time, mutex type, location.

We can try to edit a query to reveal Mutex contention details. Column SLEEPS and GETs_Per_MS are the points to monitor.

with mutex_1 as (select /*+ materializse */ m.*
                ,(gets -lag(gets) over(partition by mutex_identifier, mutex_type, location order by sleep_timestamp, requesting_session)) Total_GETs_Delta
                ,(((sleep_timestamp -lag(sleep_timestamp) over(partition by mutex_identifier, mutex_type, location 
                    order by sleep_timestamp, requesting_session)))) Elapsed
               from v$mutex_sleep_history m) 
    ,mutex_2 as (select /*+ materializse */ m.*
                ,(extract(second from Elapsed) + extract(minute from Elapsed) * 60 + extract(hour from Elapsed))*1e3 Elapsed_MS
               from mutex_1 m)
    ,mutex   as (select /*+ materializse */ m.*
                ,round(Total_GETs_Delta/nullif(Elapsed_MS, 0)) GETs_Per_MS
               from mutex_2 m)
    ,oc as (select /*+ materializse */ * from v$db_object_cache where hash_value in (select mutex_identifier from mutex)) 
    ,ses as (select /*+ materializse */ s.*
              ,(select owner|| '.' ||object_name||case when procedure_name is not null then '.' ||procedure_name end
                from dba_procedures
                where object_id = s.plsql_entry_object_id and subprogram_id = s.plsql_entry_subprogram_id) plsql_entry
              ,(select owner||'.'||object_name||case when procedure_name is not null then  '.' || procedure_name end
                from dba_procedures
                where object_id = s.plsql_object_id and subprogram_id = s.plsql_subprogram_id) plsql
              ,(select dbms_lob.substr(sql_text, 50, 1) from dba_hist_sqltext where sql_id = s.sql_id and rownum = 1) sql_text
             from v$active_session_history s where sample_time <= sysdate-10/1440)   -- only last 10 minutes
select m.*, o.*, s.*
from  mutex m, oc o, ses s;
where m.mutex_identifier = o.hash_value(+)
  and m.requesting_session = s.session_id(+)
  and m.sleep_timestamp between (s.sample_time(+) - INTERVAL'1'SECOND) and s.sample_time(+)
order by m.mutex_identifier, m.mutex_type, m.location, m.gets desc, m.sleep_timestamp desc, m.requesting_session;


5. Code Path of "library cache: mutex X"


Run Application Context settings 1000 times and at the same time dtrace its SPID: 1217.

It prints out 29 "kgxExclusive" callstacks (Top 3 callstacks are listed at first). Editing them together, we can build up a small dictionary of "library cache: mutex X" to lookup different occurrences and frequencies of mutex X.

The test (on a fresh started instance) shows that 'TEST_CTX' was pinned 1000 times for 1000 executions.

1000 Application Context settings are implemented by 1000:
    PinAndPrepare:  kglpnp   -> kglpin
    PinAndAllocate: kglpin   -> kglpnal 
    PinAndDelete:   kglUnPin -> kglpndl

SQL > select name, locks, pins, locked_total, pinned_total, executions
      from v$db_object_cache where name in ('TEST_CTX') or hash_value in (1317011825);
      
      NAME             LOCKS        PINS LOCKED_TOTAL PINNED_TOTAL  EXECUTIONS
      ---------- ----------- ----------- ------------ ------------ -----------
      TEST_CTX             1           0            1         1000         999      

SQL > exec ctx_set(1000, 123);

SQL > select name, locks, pins, locked_total, pinned_total, executions
      from v$db_object_cache where name in ('TEST_CTX') or hash_value in (1317011825);

      NAME             LOCKS        PINS LOCKED_TOTAL PINNED_TOTAL  EXECUTIONS
      ---------- ----------- ----------- ------------ ------------ -----------
      TEST_CTX             1           0            1         2000        1999 

$ > sudo dtrace -n \
'pid$target::kgxExclusive:entry /execname == "oracle"/ { self->in = 1; }
pid$target::kgxExclusive:return /self->in > 0/ {
@counts[ustack(10, 0)] = count(); 
  self->in = 0;}
' -p 1217

dtrace: description 'pid$target::kgxExclusive:entry ' matched 4 probes

oracle`kgxExclusive+0x105               oracle`kgxExclusive+0x105          oracle`kgxExclusive+0x105              
a.out`kglpin+0x2fe                      a.out`kglpndl+0x1fe                a.out`kglpnal+0x1ae                    
a.out`kglpnp+0x269                      a.out`kglUnPin+0x101               a.out`kglpin+0x52f                     
a.out`kgiina+0x1db                      a.out`kzctxChkTyp+0x14e            a.out`kglpnp+0x269                     
oracle`kgintu_named_toplevel_unit+0x39  a.out`kzctxesc+0x510               a.out`kgiina+0x1db                     
a.out`kzctxBInfoGet+0x746               a.out`pevm_icd_call_common+0x29d   oracle`kgintu_named_toplevel_unit+0x39 
a.out`kzctxChkTyp+0xfd                  a.out`pfrinstr_ICAL+0x90           a.out`kzctxBInfoGet+0x746              
a.out`kzctxesc+0x510                    oracle`pfrrun_no_tool+0x12a        a.out`kzctxChkTyp+0xfd                 
a.out`pevm_icd_call_common+0x29d        oracle`pfrrun+0x4c0                a.out`kzctxesc+0x510                   
a.out`pfrinstr_ICAL+0x90                oracle`plsql_run+0x288             a.out`pevm_icd_call_common+0x29d       
  1000                                    1000                               1000                                 

oracle`kgxExclusive+0x105              oracle`kgxExclusive+0x105           oracle`kgxExclusive+0x105              
a.out`kglpin+0x2fe                     a.out`kgldtin+0x85d                 a.out`kglhdgn+0xc2                     
a.out`kglgob+0x1ed                     a.out`kgldti+0x63                   a.out`kglLock+0x679                    
a.out`kgldpo0+0x4d8                    a.out`kksauc+0x3b7                  a.out`kglget+0x11b                     
a.out`kglgbo+0x95                      a.out`kkscscid_auc_eval+0x176       a.out`kkspsc0+0x744                    
a.out`kksauc+0x888                     a.out`kkscsCheckCriteria+0x691      a.out`kksParseCursor+0x74              
a.out`kkscscid_auc_eval+0x176          a.out`kkscsCheckCursor+0x30a        a.out`opiosq0+0x70a                    
a.out`kkscsCheckCriteria+0x691         a.out`kkscsSearchChildList+0x356    a.out`kpooprx+0x102                    
a.out`kkscsCheckCursor+0x30a           a.out`kksfbc+0x916                  a.out`kpoal8+0x308                     
a.out`kkscsSearchChildList+0x356       a.out`kkspsc0+0x9d7                 oracle`opiodr+0x433                    
  1                                      1                                   1                                    
                                                                                                                  
oracle`kgxExclusive+0x105              oracle`kgxExclusive+0x105           oracle`kgxExclusive+0x105           
a.out`kglLock+0x93d                    a.out`kglpin+0x2fe                  a.out`kglpndl+0x1fe                 
a.out`kglget+0x11b                     a.out`kglgob+0x1ed                  a.out`kglUnPin+0x101                
a.out`kglgob+0x13b                     a.out`kkdogty+0x3fd                 a.out`kksauc+0x3d0                  
a.out`kkdogty+0x3fd                    a.out`kkdot2t+0x36                  a.out`kkscscid_auc_eval+0x176       
a.out`kkdot2t+0x36                     a.out`opibnd0+0xbe0                 a.out`kkscsCheckCriteria+0x691      
a.out`psdgbaa+0x791                    a.out`opibnd+0x175                  a.out`kkscsCheckCursor+0x30a        
a.out`pevm_GBVAR+0xdc                  a.out`kpopbnd+0xc7                  a.out`kkscsSearchChildList+0x356    
a.out`pfrinstr_GBVAR+0x37              a.out`kpoal8+0x1431                 a.out`kksfbc+0x916                  
oracle`pfrrun_no_tool+0x12a            oracle`opiodr+0x433                 a.out`kkspsc0+0x9d7                 
  1                                      1                                   1                                 
                                                                                                                  
oracle`kgxExclusive+0x105              oracle`kgxExclusive+0x105           oracle`kgxExclusive+0x105              
a.out`kglLock+0x93d                    a.out`kglati+0x6a                   a.out`kglLockCursor+0xf6               
a.out`kglget+0x11b                     a.out`kksaxs+0x76b                  a.out`kxsGetLookupLock+0x70            
a.out`kglgob+0x13b                     a.out`kksauc+0x1cc                  a.out`kkscsCheckCursor+0x198           
a.out`kgldpo0+0x4d8                    a.out`kkscscid_auc_eval+0x176       a.out`kkscsSearchChildList+0x356       
a.out`kglgbo+0x95                      a.out`kkscsCheckCriteria+0x691      a.out`kksfbc+0x916                     
a.out`kksauc+0x888                     a.out`kkscsCheckCursor+0x30a        a.out`kkspsc0+0x9d7                    
a.out`kkscscid_auc_eval+0x176          a.out`kkscsSearchChildList+0x356    a.out`kksParseCursor+0x74              
a.out`kkscsCheckCriteria+0x691         a.out`kksfbc+0x916                  a.out`opiosq0+0x70a                    
a.out`kkscsCheckCursor+0x30a           a.out`kkspsc0+0x9d7                 a.out`kpooprx+0x102                    
  1                                      1                                   1                                    
    
oracle`kgxExclusive+0x105              oracle`kgxExclusive+0x105           oracle`kgxExclusive+0x105       
a.out`kglhdgn+0xc2                     a.out`kglLock+0x93d                 a.out`kgllkdl+0x196             
a.out`kglLock+0x679                    a.out`kglget+0x11b                  a.out`kglUnLock+0xf8            
a.out`kglget+0x11b                     a.out`kglgob+0x13b                  a.out`kxsUnlock+0x1b1           
a.out`kglgob+0x13b                     a.out`kkdogty+0x3fd                 a.out`kxsFreeXsc+0x16e          
a.out`kkdogty+0x3fd                    a.out`kkdot2t+0x36                  a.out`kksCloseCursor+0x60a      
a.out`kkdot2t+0x36                     oracle`psdsbaa+0x244                a.out`opicca+0x81               
a.out`opibnd0+0xbe0                    a.out`pevm_SBVAR+0x123              a.out`opiclo+0x96               
a.out`opibnd+0x175                     a.out`pfrinstr_SBVAR+0x3a           a.out`kpoclsa+0x44              
a.out`kpopbnd+0xc7                     oracle`pfrrun_no_tool+0x12a         oracle`opiodr+0x433             
  1                                      1                                   1                             
    
oracle`kgxExclusive+0x105              oracle`kgxExclusive+0x105           oracle`kgxExclusive+0x105          
a.out`kgllkdl+0x196                    a.out`kglpnal+0x1ae                 a.out`kglLock+0x2aa               
a.out`kglUnLock+0xf8                   a.out`kglpin+0x52f                  a.out`kglget+0x11b                
a.out`kksauc+0x3e0                     a.out`kglgob+0x1ed                  a.out`kksaxs+0x52d                
a.out`kkscscid_auc_eval+0x176          a.out`kgldpo0+0x4d8                 a.out`kksauc+0x1cc                
a.out`kkscsCheckCriteria+0x691         a.out`kglgbo+0x95                   a.out`kkscscid_auc_eval+0x176     
a.out`kkscsCheckCursor+0x30a           a.out`kksauc+0x888                  a.out`kkscsCheckCriteria+0x691    
a.out`kkscsSearchChildList+0x356       a.out`kkscscid_auc_eval+0x176       a.out`kkscsCheckCursor+0x30a      
a.out`kksfbc+0x916                     a.out`kkscsCheckCriteria+0x691      a.out`kkscsSearchChildList+0x356  
a.out`kkspsc0+0x9d7                    a.out`kkscsCheckCursor+0x30a        a.out`kksfbc+0x916                
  1                                      1                                   1                               
                                                                                                                  
oracle`kgxExclusive+0x105              oracle`kgxExclusive+0x105           oracle`kgxExclusive+0x105           
a.out`kglpin+0x2fe                     a.out`kgllkdl+0x196                 a.out`kglpnal+0x1ae                 
a.out`kglpnp+0x269                     a.out`kglUnLock+0xf8                a.out`kglpin+0x52f                  
a.out`kgiinb+0x5e0                     a.out`kxsReleaseParentLock+0xb6     a.out`kglgob+0x1ed                  
a.out`pfri7_inst_body_common+0x18d     a.out`kxsFreeXsc+0x181              a.out`kkdogty+0x3fd                 
a.out`pfri3_inst_body+0x45             a.out`kksCloseCursor+0x60a          a.out`kkdot2t+0x36                  
oracle`pfrrun+0x685                    a.out`opicca+0x81                   a.out`opibnd0+0xbe0                 
oracle`plsql_run+0x288                 a.out`opiclo+0x96                   a.out`opibnd+0x175                  
oracle`peicnt+0x946                    a.out`kpoclsa+0x44                  a.out`kpopbnd+0xc7                  
oracle`kkxexe+0x2f3                    oracle`opiodr+0x433                 a.out`kpoal8+0x1431                 
  3                                      1                                   1                                 
                                                                                                                  
oracle`kgxExclusive+0x105              oracle`kgxExclusive+0x105           oracle`kgxExclusive+0x105              
a.out`kglhdgn+0x23e                    a.out`kglpnal+0x1ae                 a.out`kgllkdl+0x196                    
a.out`kglLock+0x679                    a.out`kglpin+0x52f                  a.out`kss_del_cb+0x19d                 
a.out`kglget+0x11b                     a.out`kglpnp+0x269                  a.out`kssdel+0xf2                      
a.out`kglgob+0x13b                     a.out`kgiinb+0x5e0                  a.out`kssdch_int+0x326                 
a.out`kkdogty+0x3fd                    a.out`pfri7_inst_body_common+0x18d  a.out`ksudlc+0xf6                      
a.out`kkdot2t+0x36                     a.out`pfri3_inst_body+0x45          a.out`kss_del_cb+0x106                 
a.out`opibnd0+0xbe0                    oracle`pfrrun+0x685                 a.out`kssdel+0xf2                      
a.out`opibnd+0x175                     oracle`plsql_run+0x288              a.out`ksupop+0x23a                     
a.out`kpopbnd+0xc7                     oracle`peicnt+0x946                 oracle`opiodr+0x4b1                    
  1                                      3                                   1                                    

oracle`kgxExclusive+0x105              oracle`kgxExclusive+0x105           oracle`kgxExclusive+0x105        
a.out`kglnti+0x83                      a.out`kglhdgn+0x23e                 a.out`kglpin+0x2fe               
a.out`kksauc+0x7c0                     a.out`kglLock+0x679                 a.out`kglpnp+0x269               
a.out`kkscscid_auc_eval+0x176          a.out`kglget+0x11b                  a.out`kgiind+0xbae               
a.out`kkscsCheckCriteria+0x691         a.out`kkspsc0+0x744                 a.out`pfri8_inst_spec+0x7e       
a.out`kkscsCheckCursor+0x30a           a.out`kksParseCursor+0x74           a.out`pfri1_inst_spec+0x45       
a.out`kkscsSearchChildList+0x356       a.out`opiosq0+0x70a                 oracle`pfrrun+0x5e2              
a.out`kksfbc+0x916                     a.out`kpooprx+0x102                 oracle`plsql_run+0x288           
a.out`kkspsc0+0x9d7                    a.out`kpoal8+0x308                  oracle`peicnt+0x946              
a.out`kksParseCursor+0x74              oracle`opiodr+0x433                 oracle`kkxexe+0x2f3              
  1                                      1                                   4                              
    
oracle`kgxExclusive+0x105              oracle`kgxExclusive+0x105   
a.out`kglpnal+0x1ae                    a.out`kglpndl+0x1fe         
a.out`kglpin+0x52f                     a.out`kss_del_cb+0x19d      
a.out`kglpnp+0x269                     a.out`kssdel+0xf2           
a.out`kgiind+0xbae                     a.out`kssdch_int+0x326      
a.out`pfri8_inst_spec+0x7e             a.out`ksudlc+0xf6           
a.out`pfri1_inst_spec+0x45             a.out`kss_del_cb+0x106      
oracle`pfrrun+0x5e2                    a.out`kssdel+0xf2           
oracle`plsql_run+0x288                 a.out`ksupop+0x23a          
oracle`peicnt+0x946                    oracle`opiodr+0x4b1         
  4                                      8  


6. Mutex vs. Latch


Latch is an instance-wide centralized locking mechanism, whereas Mutex is a distributed locking mechanism, directly attached on the specific shared memory data structures. That is why there exists v$latch (v$latch_children) for all Latches, whereas Mutex is exposed as V$DB_OBJECT_CACHE.hash_value. Latch is pre-defined and limited, whereas Mutex is dynamically created/released when requested.

Blog: Reducing "library cache: mutex X" concurrency with dbms_shared_pool.markhot lists top 3 differences between mutexes and latches:
  – A mutex can protect a single structure, latches often protect many structures
  – A mutex get is about 30-35 instructions in the algorithm, compared to 150-200 instructions for a latch get
  – A mutex is around 16 bytes in size, compared to 112-200 bytes for a latch
Mutex looks about 5 times slimmer and hence hopefully proportionally faster than Latch.

Further deep discussion can be found in Blog: LATCHES, LOCKS, PINS AND MUTEXES


7. Mutex Contention Test on Linux


Run the test on Linux, the same "library cache: mutex X" contentions can be observed.

Pick one Oracle session's spid, for example, 16060.
Run Command strace, We can see continues output of lines beginning with semtimedop:

$ > strace -Tp 16060

    semtimedop(20873218, {{41, -1, 0}}, 1, {0, 10000000}) = -1 EAGAIN (Resource temporarily unavailable) <0.011305>
Refer to Linux Documentations:

int semtimedop(int semid, struct sembuf *sops, size_t nsops, const struct timespec *timeout);

struct sembuf {
  unsigned short  sem_num; /* semaphore index in array, semaphore number */
  short          sem_op;  /* semaphore operation */
  short          sem_flg; /* operation flags */
};

struct timespec {
  time_t          tv_sec;
  long            tv_nsec;
};
                 
EAGAIN: An operation could not proceed immediately and 
        either IPC_NOWAIT was specified in sem_flg or the time limit specified in timeout expired.             
timespec.tv_nsec = 10000000 ns (0.011305 Second) looks like CFS Scheduler Time Slice as 10ms.

We can try to simulate Oracle ORA-07445 by:

kill -s SEGV 16060         
We get database alert log and incident file of killed session:
    
-- alert log    
Thu Oct 26 09:04:08 2017
Exception [type: SIGSEGV, unknown code] [ADDR:0x6400003F6A] [PC:0x7F98B7BA728A, semtimedop()+10] [exception issued by pid: 16234, uid: 100] [flags: 0x0, count: 1]
Errors in file /orabin/app/oracle/admin/c0d00445/diag/rdbms/c0d00445/c0d00445/trace/c0d00445_j001_16060.trc  (incident=22697):
ORA-07445: exception encountered: core dump [semtimedop()+10] [SIGSEGV] [ADDR:0x6400003F6A] [PC:0x7F98B7BA728A] [unknown code] []
Incident details in: /orabin/app/oracle/admin/c0d00445/diag/rdbms/c0d00445/c0d00445/incident/incdir_22697/c0d00445_j001_16060_i22697.trc

-- incident file 
Exception [type: SIGSEGV, unknown code] [ADDR:0x6400003F6A] [PC:0x7F98B7BA728A, semtimedop()+10] 
          [exception issued by pid: 16234, uid: 100] [flags: 0x0, count: 1]
Registers:
%rax: 0xfffffffffffffffc %rbx: 0x0000000000000000 %rcx: 0xffffffffffffffff
%rdx: 0x0000000000000001 %rdi: 0x00000000013e8002 %rsi: 0x00007ffc98536278
%rsp: 0x00007ffc985360a8 %rbp: 0x00007ffc985362a0  %r8: 0x0000000000002710
 %r9: 0x0000000000989680 %r10: 0x00007ffc98536228 %r11: 0x0000000000000202
%r12: 0x00007ffc985364d0 %r13: 0x00000000013e8002 %r14: 0x00000001708d3998
%r15: 0x0000000000000000 %rip: 0x00007f98b7ba728a %efl: 0x0000000000000202
  semtimedop()+0 (0x7f98b7ba7280) mov %rcx,%r10
  semtimedop()+3 (0x7f98b7ba7283) mov $0xdc,%eax
  semtimedop()+8 (0x7f98b7ba7288) syscall
> semtimedop()+10 (0x7f98b7ba728a) cmp $0xfffff001,%rax
  semtimedop()+16 (0x7f98b7ba7290) jae 0x7f98b7ba7293
  semtimedop()+18 (0x7f98b7ba7292) ret
  semtimedop()+19 (0x7f98b7ba7293) mov 0x2a2d0e(%rip),%rcx
  semtimedop()+26 (0x7f98b7ba729a) xor %edx,%edx
 
   sslsshandler()+456         -> ssexhd()        // CALL TYPE: call   ERROR SIGNALED: yes   COMPONENT: (null)
   __sighandler()             -> sslsshandler() 
   semtimedop()+10            -> __sighandler() 
   sskgpwwait()+237           -> semtimedop() 
   skgpwwait()+200            -> sskgpwwait() 
   ksliwat()+2091             -> skgpwwait() 
   kslwaitctx()+161           -> ksliwat() 
   ksfwaitctx()+32            -> kslwaitctx() 
   kgxWait()+1076             -> ksfwaitctx() 
   kgxExclusive()+741         -> kgxWait() 
   kglGetMutex()+170          -> kgxExclusive() 
   kglpndl()+503              -> kglGetMutex() 
   kglpnds()+81               -> kglpndl() 
   kglUnPin()+228             -> kglpnds() 
   kzctxChkTyp()+306          -> kglUnPin() 
   kzctxesc()+347             -> kzctxChkTyp() 
   pevm_icd_call_common()+411 -> kzctxesc() 
   pfrinstr_ICAL()+135        -> pevm_icd_call_common() 
   pfrrun_no_tool()+60        -> pfrinstr_ICAL() 
   pfrrun()+1155              -> pfrrun_no_tool() 
   plsql_run()+708            -> pfrrun() 


8. No Read Consistency in Application Context (Addendum 2017.06.06)


Modify above local context as global context, and start a job to update context value by:

exec clean_jobs;

create or replace context test_ctx using test_ctx_pkg accessed globally;

create or replace procedure ctx_set_global(p_cnt number) as
begin
 for i in 1..p_cnt loop
   test_ctx_pkg.set_val(i);    
 end loop;
end;
/

create or replace procedure ctx_set_jobs_global as
  l_job_id pls_integer;
begin
  dbms_job.submit(l_job_id, 'begin while true loop ctx_set_global(1000000); end loop; end;');
  commit;
end;    
/

exec ctx_set_jobs_global;

And then query systimestamp and global context by:

alter session set nls_timestamp_tz_format ='yyyy-mon-dd hh24:mi:ss.ff tzr tzd';
column name format a8
column min_time format a35
column max_time format a35
column min_ctx format a10
column max_ctx format a10

-- grace period for job start
exec dbms_lock.sleep(10);   

with sq_time_1 as (select systimestamp tm from dual connect by level <= 10000),
     sq_ctx_1  as (select sys_context('test_ctx', 'attr') ctx from dual connect by level <= 100),
     sq_time_2 as (select systimestamp tm from dual connect by level <= 10000),
     sq_ctx_2  as (select sys_context('test_ctx', 'attr') ctx from dual connect by level <= 100)
select 'Snap_1' name, min(tm) min_time, max(tm) max_time, min(ctx) min_ctx, max(ctx) max_ctx from sq_time_1, sq_ctx_1
union all
select 'Snap_2' name, min(tm) min_time, max(tm) max_time, min(ctx) min_ctx, max(ctx) max_ctx from sq_time_2, sq_ctx_2;


NAME     MIN_TIME                            MAX_TIME                            MIN_CTX    MAX_CTX
-------- ----------------------------------- ----------------------------------- ---------- ----------
Snap_1   2017-jun-06 08:05:09.625790 +02:00  2017-jun-06 08:05:09.625790 +02:00  263007     263007
Snap_2   2017-jun-06 08:05:09.625790 +02:00  2017-jun-06 08:05:09.625790 +02:00  362922     362922

exec dbms_lock.sleep(4);

with sq_ctx_1  as (select sys_context('test_ctx', 'attr') ctx from dual connect by level <= 100),
     sq_time_1 as (select systimestamp tm from dual connect by level <= 10000)
select 'Snap_1' name, min(tm) min_time, max(tm) max_time, min(ctx) min_ctx, max(ctx) max_ctx from sq_time_1, sq_ctx_1
union all
select 'Snap_2' name, systimestamp, systimestamp, min(sys_context('test_ctx', 'attr')), max(sys_context('test_ctx', 'attr')) 
  from dual connect by level <= 100;

NAME     MIN_TIME                            MAX_TIME                            MIN_CTX    MAX_CTX
-------- ----------------------------------- ----------------------------------- ---------- ----------
Snap_1   2017-jun-06 08:05:17.354265 +02:00  2017-jun-06 08:05:17.354265 +02:00  693574     693574
Snap_2   2017-jun-06 08:05:17.354265 +02:00  2017-jun-06 08:05:17.354265 +02:00  794397     794396

The output shows that systimestamp satisfies Read Consistency, but global context has two or more different values, hence global context does not guarantee Statement-Level Read Consistency.

Even for local context, Blog: Gotcha: Application Contexts demonstrated Non Read Consistency.

9. Test Code


create or replace context test_ctx using test_ctx_pkg;

create or replace package test_ctx_pkg is 
  procedure set_val (val number);
 end;
/

create or replace package body test_ctx_pkg is
  procedure set_val (val number) as
  begin
    dbms_session.set_context('test_ctx', 'attr', val);
  end;
end;
/

create or replace procedure ctx_set(p_cnt number, val number) as
begin
 for i in 1..p_cnt loop
  test_ctx_pkg.set_val(val);    -- 'library cache: mutex X' on TEST_CTX
 end loop;
end;
/

create or replace procedure ctx_set_jobs(p_job_cnt number) as
  l_job_id pls_integer;
begin
  for i in 1.. p_job_cnt loop
    dbms_job.submit(l_job_id, 'begin while true loop ctx_set(100000, '||i||'); end loop; end;');
  end loop;
  commit;
end;    
/

create or replace procedure clean_jobs as
begin
  for c in (select job from dba_jobs) loop
    begin
       dbms_job.remove (c.job);
    exception when others then null;
    end;
    commit;
  end loop;

  for c in (select d.job, d.sid, (select serial# from v$session where sid = d.sid) ser 
              from dba_jobs_running d) loop
    begin
      execute immediate
             'alter system kill session '''|| c.sid|| ',' || c.ser|| ''' immediate';
      dbms_job.remove (c.job);
    exception when others then null;
    end;
    commit;
  end loop;
  
  -- select * from dba_jobs;
  -- select * from dba_jobs_running;
end;
/


/**
exec ctx_set_jobs(4);

exec clean_jobs;

column program format a20
column event format a25
column p1text format a6
column p1 format 9999999999
column p2text format a6
column p2 format 9999999999999
column p3text format a6
column p3 format 9999999999999999

select sid, program, event, p1text, p1, p2text, p2, p3text, p3
  from v$session where program like '%(J%';

  SID PROGRAM              EVENT                     P1TEXT          P1 P2TEXT             P2 P3TEXT                P3
----- -------------------- ------------------------- ------ ----------- ------ -------------- ------ -----------------
   38 oracle@testdb (J003) library cache: mutex X    idn     1317011825 value   3968549781504 where   9041305591414788
  890 oracle@testdb (J000) library cache: mutex X    idn     1317011825 value    163208757248 where   9041305591414874
  924 oracle@testdb (J001) library cache: mutex X    idn     1317011825 value   4556960301056 where   9041305591414874
 1061 oracle@testdb (J002) library cache: mutex X    idn     1317011825 value   3968549781504 where   9041305591414879

  
column name format a10
column namespace format a15
column type format a15
set numformat 9999999999
  
select name, namespace, type, hash_value, locks, pins, locked_total, pinned_total
from v$db_object_cache where hash_value in (1317011825);

NAME       NAMESPACE       TYPE             HASH_VALUE       LOCKS        PINS LOCKED_TOTAL PINNED_TOTAL
---------- --------------- --------------- ----------- ----------- ----------- ------------ ------------
TEST_CTX   APP CONTEXT     APP CONTEXT      1317011825           4           0            4    257802287

**/

Tuesday, September 13, 2016

PL/SQL Multidimensional Collection Memory Usage and Performance

PL/SQL multidimensional collections are modeled by creating a collection whose elements are also collections.

We found that the memory usage and performance of Multidimensional Collections are dependent on the total number of branch nodes, which are determined by the data characteristics and the subscript indices ordering.

This Blog will try to demonstrate the findings with appended Test Code.

1. Know Your Data


At first, run two queries to understand the test data (see the Test Code):

SQL> select round(num_rows*avg_row_len/1024/1024) mb from dba_tables where table_name = 'MEM_TAB';

46 MB

SQL> select * 
  from (
  select count(distinct id) id, count(distinct name) name
        ,count(distinct num_1) num_1, count(distinct num_2) num_2
        ,count(distinct txt_1) txt_1, count(distinct txt_2) txt_2
        ,count(distinct num_1||'.'||num_2) num_1_2, count(distinct txt_1||'.'||txt_2) txt_1_2
    from mem_tab)
unpivot (distinct_count for count_name in
        (id as 'count(distinct id)', name as 'count(distinct name)'
        ,num_1 as 'count(distinct num_1)', num_2 as 'count(distinct num_2)'
        ,txt_1 as 'count(distinct txt_1)', txt_2 as 'count(distinct txt_2)' 
        ,num_1_2 as 'count(distinct num_1.num_2)'
        ,txt_1_2 as 'count(distinct txt_1.txt_2)'
        ));

COUNT_NAME                  DISTINCT_COUNT
--------------------------- --------------
count(distinct id)               1,000,000
count(distinct name)             1,000,000
count(distinct num_1)              500,000
count(distinct num_2)               50,000
count(distinct txt_1)              500,000
count(distinct txt_2)               50,000
count(distinct num_1.num_2)      1,000,000
count(distinct txt_1.txt_2)      1,000,000

Test table mem_tab is composed of 6 columns, contains 1,000,000 rows, and theoretical size is about 46 MB.
  • It has 2 unique columns (id and name), and two composite unique columns ((num_1.num_2), (txt_1.txt_2)).
  • num_1 and txt_1 have 500,000 distinct values each, branching factor = 2, hence 500,000 branch nodes when storing as array(num_1)(num_2);
  • num_2 and txt_2 have 50,000 distinct values each, branching factor = 20, hence 50,000 branch nodes when storing as array(num_2)(num_1).

2. Create Test


Run command:

SQL> exec test_collection_dimm_mem.run(1000000);

0  Dimension 1 array(id)  , pga_used(MB) =   411, Elapsed(Second)= 5.44
1  Dimension 1 array(name), pga_used(MB) =   459, Elapsed(Second)= 6.21
2  Dimension 2 array(num_1)(num_2), pga_used(MB) =  2227, Elapsed(Second)= 8.54
3  Dimension 2 array(num_2)(num_1), pga_used(MB) =   536, Elapsed(Second)= 5.44
4  Dimension 2 array(txt_1)(txt_2), pga_used(MB) =  2739, Elapsed(Second)= 7.79
5  Dimension 2 array(txt_2)(txt_1), pga_used(MB) =   628, Elapsed(Second)= 5.96
6  Dimension 2 array(num_1)(txt_2), pga_used(MB) =  2719, Elapsed(Second)= 6.53
7  Dimension 2 array(num_2)(txt_1), pga_used(MB) =   626, Elapsed(Second)= 5.62
8  Dimension 2 array(txt_1)(num_2), pga_used(MB) =  2251, Elapsed(Second)= 9.12
9  Dimension 2 array(txt_2)(num_1), pga_used(MB) =   539, Elapsed(Second)= 6.07
   
10 Dimension 1 table(id), pga_used(MB) =   411, Elapsed(Second)= 6.25
11 Dimension 2 table(num_1)(num_2), pga_used(MB) =   889, Elapsed(Second)= 9.78
12 Dimension 2 table(num_2)(num_1), pga_used(MB) =   443, Elapsed(Second)= 7.86  

The above output shows:
  1. One dimensional associative array has a factor 10 more memory usage compared to disk table size (46MB).
  2. Comparing RUN 4 and RUN 5, two dimensional can have a factor 60 more memory usage (2739/46) when organized with 500,000 branch nodes in array(txt_1)(txt_2); but factor 14 (628/46) when organized with 50,000 branch nodes in array(txt_2)(txt_1) by simply exchanging the subscripts;
    More than a factor 4 of difference when exchanging the order of subscripts.
    Each branch node takes about 5KB to 13KB.
    (imagine the cases of organizations with 50% vs. 5% employees being managers)
  3. One dimensional nested table and associative array are comparable.
  4. Two dimensional nested tables have a factor of 19 (888/46), and 10 (443/46).
  5. The performance is proportional to the memory usage.

3. Lookup Test


Once the data is stored in array/table, they can be used for in-memory search.
For example, in Test Code: test_collection_dimm_mem.run, reactivate the lookup snippet code:

    ---- lookup example ----
    l_start_time := dbms_utility.get_time;
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_node_rec := l_array_d2nn(c.num_1)(c.num_2);
    end loop;

Test output below shows that the performance difference is less than 30% among associative arrays.
But nested table is a factor 3 to 5 faster.

0  Dimension 1 array(id)  , pga_used(MB) =   411, Elapsed(Second)= 2.27
1  Dimension 1 array(name), pga_used(MB) =   459, Elapsed(Second)= 2.88
2  Dimension 2 array(num_1)(num_2), pga_used(MB) =  2227, Elapsed(Second)= 2.85
3  Dimension 2 array(num_2)(num_1), pga_used(MB) =   536, Elapsed(Second)= 2.51
4  Dimension 2 array(txt_1)(txt_2), pga_used(MB) =  2739, Elapsed(Second)= 3.3
5  Dimension 2 array(txt_2)(txt_1), pga_used(MB) =   628, Elapsed(Second)= 2.95
6  Dimension 2 array(num_1)(txt_2), pga_used(MB) =  2719, Elapsed(Second)= 2.77
7  Dimension 2 array(num_2)(txt_1), pga_used(MB) =   626, Elapsed(Second)= 2.58
8  Dimension 2 array(txt_1)(num_2), pga_used(MB) =  2251, Elapsed(Second)= 3.4
9  Dimension 2 array(txt_2)(num_1), pga_used(MB) =   539, Elapsed(Second)= 2.93
   
10 Dimension 1 table(id), pga_used(MB) =   411, Elapsed(Second)= .45
11 Dimension 2 table(num_1)(num_2), pga_used(MB) =   889, Elapsed(Second)= 1.09
12 Dimension 2 table(num_2)(num_1), pga_used(MB) =   443, Elapsed(Second)= .8


4. Heapdump Dump


Make heapdump for 3 Cases:
  1. Dimension 1 array(id) in RUN 0
  2. Dimension 2 array(num_1)(num_2) in RUN 2
  3. Dimension 2 array(num_2)(num_1) in RUN 3
by:

alter session set events 'immediate trace name heapdump level 16';

respective "top uga heap" displays the main memory usage:

--- Dimension 1 array(id) ---
 HEAP DUMP heap name="top uga heap"  desc=0x7f4b2d56ada0
 Total heap size    =427532128
 
--- Dimension 2 array(num_1)(num_2) ---
 HEAP DUMP heap name="top uga heap"  desc=0x7f94fddf3da0
 Total heap size    =2331900936

--- Dimension 2 array(num_2)(num_1) ---
 HEAP DUMP heap name="top uga heap"  desc=0x7fb10cf26da0
 Total heap size    =558883712  


5. PGA Dump


Further make pga_detail_dump by:

alter session set events 'immediate trace name pga_detail_dump level 27';

which reveals corresponding details by categories:
(note 27 is Oracle process number (PID), only top categories are listed)

--- Dimension 1 array(id) ---
  220672920 bytes,13515 chunks: "pmuccst: adt/record       "  PL/SQL
            koh-kghu sessi  ds=0x7f4b27f791b8  dsprt=0x7f4b28eed740
  182716608 bytes,11196 chunks: "pl/sql vc2                "  PL/SQL
            koh-kghu sessi  ds=0x7f4b27f791b8  dsprt=0x7f4b28eed740
   18667040 bytes,1144 chunks: "pmucalm coll              "  PL/SQL
            koh-kghu sessi  ds=0x7f4b27f791b8  dsprt=0x7f4b28eed740
     973992 bytes,  66 chunks: "static frame of inst      "  PL/SQL
            koh-kghu sessi  ds=0x7f4b282a8580  dsprt=0x7f4b28eed740
  
--- Dimension 2 array(num_1)(num_2) ---        
  1633408272 bytes,100038 chunks: "pmucalm coll              "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740
  282131512 bytes,17279 chunks: "pmuccst: adt/record       "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740
  187843616 bytes,11510 chunks: "pl/sql vc2                "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740
  117545272 bytes,7199 chunks: "pmucpcon: ipm             "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740
   57776680 bytes,3539 chunks: "pmucpcon: tds             "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740
   41783352 bytes,2559 chunks: "pmucpcon: cds             "  PL/SQL
            koh-kghu sessi  ds=0x7f94f88121b8  dsprt=0x7f94f977d740          
  
--- Dimension 2 array(num_2)(num_1) ---
  243923992 bytes,14939 chunks: "pmuccst: adt/record       "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740
  160771776 bytes,9852 chunks: "pl/sql vc2                "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740
  127166584 bytes,7789 chunks: "pmucalm coll              "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740
   10956088 bytes, 671 chunks: "pmucpcon: ipm             "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740
    5747456 bytes, 352 chunks: "pmucpcon: cds             "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740
    4416776 bytes, 271 chunks: "pmucpcon: tds             "  PL/SQL
            koh-kghu sessi  ds=0x7fb1079451b8  dsprt=0x7fb1088ad740   

In the above 3 Cases, "pmuccst: adt/record" and "pl/sql vc2" are almost similar, but "pmucalm coll" makes the difference.
  1. "pl/sql vc2" are all involved varchar2 strings.
  2. "pmuccst: adt/record" (ADT: Abstract Data Type) stores all the PL/SQL records, in this test, it is 1,000,000.
  3. "pmucalm coll" looks like all the allocated collections, which represent the branch nodes. array(num_1)(num_2) takes 1,633,408,272 Bytes, whereas array(num_2)(num_1) 127,166,584 since the first one has 500,000 branch nodes, whereas second 50,000.
In Oracle applications, PL/SQL collections are often the cause of ORA-04030 when storing large number of elements (same type). Determining the categories helps pinpoint the main memory consumption.
  • "pmuccst: adt/record" stands for leaf nodes, that is the number of elements.
  • "pmucalm coll" represents branch nodes, that is the number of collections.
BTW, the abbreviation ds and dsprt are the shortcuts of "heap_descriptor" and "parent_heap_descriptor".

6. Populate process_memory_detail


Dump process_memory_detail into process_memory_detail_v by setting event PGA_DETAIL_GET, then query the result with:

column run_name format A40
column "NAME(MB, ALLOC_COUNT) List" format A200
column name format A30

select run, run_name, pid, round(sum(bytes)/1024/1024) mb, sum(allocation_count) allocation_count
from process_memory_detail_v 
group by run, run_name, pid 
order by run;

 RUN RUN_NAME                          PID      MB ALLOCATION_COUNT
---- -------------------------------- ---- ------- ----------------
   0 Dimension 1 array(id)              27     409           29,676
   1 Dimension 1 array(name)            27     458           32,824
   2 Dimension 2 array(num_1)(num_2)    27   2,220          145,969
   3 Dimension 2 array(num_2)(num_1)    27     534           37,721
   4 Dimension 2 array(txt_1)(txt_2)    27   2,730          178,767
   5 Dimension 2 array(txt_2)(txt_1)    27     626           43,632
   6 Dimension 2 array(num_1)(txt_2)    27   2,711          177,517
   7 Dimension 2 array(num_2)(txt_1)    27     624           43,511
   8 Dimension 2 array(txt_1)(num_2)    27   2,244          147,554
   9 Dimension 2 array(txt_2)(num_1)    27     537           37,895
  10 Dimension 1 table(id)              27     411           29,745
  11 Dimension 2 table(num_1)(num_2)    27     886           60,281
  12 Dimension 2 table(num_2)(num_1)    27     443           31,829
-------------------------------------------------------------------  

select run, run_name, pid, round(sum(bytes)/1024/1024) mb,
       (select listagg(rpad(name||'('||round(bytes/1024/1024)||', '||allocation_count||')', 40, chr(32)), '; ') 
               within group (order by bytes desc) agg
          from (select * from process_memory_detail_v c 
                 where c.run = v.run and c.pid = v.pid and bytes > 0
                 order by bytes desc) c 
         where rownum <= 3
         ) "NAME(MB, ALLOC_COUNT) List"
from process_memory_detail_v v
group by run, run_name, pid
order by run, run_name, pid;

RUN RUN_NAME                         PID     MB NAME(MB, ALLOC_COUNT) List
--- ------------------------------- ---- ------ ---------------------------------------------------------------------------------------------------
  0 Dimension 1 array(id)             27    409 pmuccst: adt/record(225, 14456) ; pl/sql vc2(161, 10311)                  ; pmucalm coll(17, 1084)
  1 Dimension 1 array(name)           27    458 pl/sql vc2(196, 12561)          ; pmuccst: adt/record(190, 12220)         ; pmucalm coll(65, 4196)
  2 Dimension 2 array(num_1)(num_2)   27  2,220 pmucalm coll(1560, 100167)      ; pmuccst: adt/record(271, 17379)         ; pl/sql vc2(179, 11488)
  3 Dimension 2 array(num_2)(num_1)   27    534 pmuccst: adt/record(231, 14827) ; pl/sql vc2(153, 9836)                   ; pmucalm coll(123, 7902)
  4 Dimension 2 array(txt_1)(txt_2)   27  2,730 pmucalm coll(2699, 173350)      ; pmuccst: adt/record(14, 882)            ; pl/sql vc2(10, 675)
  5 Dimension 2 array(txt_2)(txt_1)   27    626 pmucalm coll(229, 14676)        ; pmuccst: adt/record(207, 13302)         ; pl/sql vc2(176, 11272)
  6 Dimension 2 array(num_1)(txt_2)   27  2,711 pmucalm coll(2596, 166684)      ; pmuccst: adt/record(62, 3975)           ; pl/sql vc2(47, 2993)
  7 Dimension 2 array(num_2)(txt_1)   27    624 pmucalm coll(222, 14238)        ; pmuccst: adt/record(221, 14194)         ; pl/sql vc2(167, 10739)
  8 Dimension 2 array(txt_1)(num_2)   27  2,244 pmucalm coll(1570, 100838)      ; pmuccst: adt/record(273, 17546)         ; pl/sql vc2(185, 11904)
  9 Dimension 2 array(txt_2)(num_1)   27    537 pmuccst: adt/record(232, 14931) ; pl/sql vc2(155, 9946)                   ; pmucalm coll(123, 7870)
 10 Dimension 1 table(id)             27    411 pmuccst: adt/record(222, 14276) ; pl/sql vc2(163, 10488)                  ; pmucalm coll(17, 1089)
 11 Dimension 2 table(num_1)(num_2)   27    886 pmucalm coll(286, 18381)        ; pmuccst: adt/record(248, 15936)         ; pl/sql vc2(144, 9229)
 12 Dimension 2 table(num_2)(num_1)   27    443 pmuccst: adt/record(229, 14680) ; pl/sql vc2(155, 9951)                   ; pmucalm coll(30, 1905)
----------------------------------------------------------------------------------------------------------

select run, run_name, pid, category, name, heap_name, round(bytes/1024/1024) mb
      ,allocation_count, round(bytes/allocation_count) bytes_per_cnt
from process_memory_detail_v 
where name = 'pmucalm coll'
order by pid, category, name, heap_name, run, timestamp;

RUN RUN_NAME                         PID  CATEGORY  NAME           HEAP_NAME        MB      ALLOCATION_COUNT   BYTES_PER_CNT
--- ------------------------------- ---- ---------- ------------- ---------------- ------  -----------------  --------------
  0 Dimension 1 array(id)             27  PL/SQL    pmucalm coll   koh-kghu sessi      17              1,084          16,317
  1 Dimension 1 array(name)           27  PL/SQL    pmucalm coll   koh-kghu sessi      65              4,196          16,325
  2 Dimension 2 array(num_1)(num_2)   27  PL/SQL    pmucalm coll   koh-kghu sessi   1,560            100,167          16,328
  3 Dimension 2 array(num_2)(num_1)   27  PL/SQL    pmucalm coll   koh-kghu sessi     123              7,902          16,326
  4 Dimension 2 array(txt_1)(txt_2)   27  PL/SQL    pmucalm coll   koh-kghu sessi   2,699            173,350          16,328
  5 Dimension 2 array(txt_2)(txt_1)   27  PL/SQL    pmucalm coll   koh-kghu sessi     229             14,676          16,327
  6 Dimension 2 array(num_1)(txt_2)   27  PL/SQL    pmucalm coll   koh-kghu sessi   2,596            166,684          16,328
  7 Dimension 2 array(num_2)(txt_1)   27  PL/SQL    pmucalm coll   koh-kghu sessi     222             14,238          16,327
  8 Dimension 2 array(txt_1)(num_2)   27  PL/SQL    pmucalm coll   koh-kghu sessi   1,570            100,838          16,328
  9 Dimension 2 array(txt_2)(num_1)   27  PL/SQL    pmucalm coll   koh-kghu sessi     123              7,870          16,326
 10 Dimension 1 table(id)             27  PL/SQL    pmucalm coll   koh-kghu sessi      17              1,089          16,317
 11 Dimension 2 table(num_1)(num_2)   27  PL/SQL    pmucalm coll   koh-kghu sessi     286             18,381          16,327
 12 Dimension 2 table(num_2)(num_1)   27  PL/SQL    pmucalm coll   koh-kghu sessi      30              1,905          16,322

  • The first query shows the memory usage per run.
  • The second query lists the memory allocations of top 3 Heaps for each run.
  • The third query reveals Heap "pmucalm coll" statistics of memory and allocation_count.
    The top 4 RUNs (2, 4, 6, 8) have 500,000 branch nodes, and hence more memory and allocation_count.
    Computing the average bytes per allocation, BYTES_PER_CNT shows almost a constant value of 16K.
    Probably generous reserving 16K (2x8K-Blocks) for each "pmucalm coll" conforms to the design strategy of preventing expensive branch node splitting.

7. Test Code



set numformat 999,999,999

drop table mem_tab;

create table mem_tab(id number, name varchar2(10) 
                    ,num_1 number, num_2 number 
                    ,txt_1 varchar2(10), txt_2 varchar2(10));

insert into mem_tab 
select level id, rpad(level, 10, 'X') name
   ,level num_1, ceil(level/20) num_2 
   ,rpad(level, 10, 'X') txt_1, rpad(ceil(level/20), 10, 'X') txt_2 
  from dual connect by level <= 500000
union all
select (level + 500000) id, rpad((level + 500000), 10, 'X') name
   ,level num_1, -ceil(level/20) num_2 
   ,rpad(level, 10, 'X') txt_1, rpad(-ceil(level/20), 10, 'X') txt_2 
  from dual connect by level <= 500000;

commit;

exec dbms_stats.gather_table_stats(null, 'MEM_TAB');

select round(num_rows*avg_row_len/1024/1024) mb from dba_tables where table_name = 'MEM_TAB';
-- 46 MB

select * 
  from (
  select count(distinct id) id, count(distinct name) name
        ,count(distinct num_1) num_1, count(distinct num_2) num_2
        ,count(distinct txt_1) txt_1, count(distinct txt_2) txt_2
        ,count(distinct num_1||'.'||num_2) num_1_2, count(distinct txt_1||'.'||txt_2) txt_1_2
    from mem_tab)
unpivot (distinct_count for count_name in
        (id as 'count(distinct id)', name as 'count(distinct name)'
        ,num_1 as 'count(distinct num_1)', num_2 as 'count(distinct num_2)'
        ,txt_1 as 'count(distinct txt_1)', txt_2 as 'count(distinct txt_2)' 
        ,num_1_2 as 'count(distinct num_1.num_2)'
        ,txt_1_2 as 'count(distinct txt_1.txt_2)'
        ));

create or replace package test_collection_dimm_mem as
  procedure run (p_cnt number := 1000000);
end;
/

drop table process_memory_detail_v;

create table process_memory_detail_v as 
  select 123 run, rpad('A', 40, 'X') run_name, timestamp'1998-02-17 11:22:00' timestamp
        ,234 session_id, 345 session_serial#, v.* 
  from v$process_memory_detail v where 1=2;

create or replace package body test_collection_dimm_mem as
  type t_rec is record (
    id            number
   ,name          varchar2(10)
   ,num_1         number
   ,num_2         number
   ,txt_1         varchar2(10)
   ,txt_2         varchar2(10)
  );
  type t_array_d1n  is table of t_rec      index by pls_integer;    -- array(number)
  type t_array_d1t  is table of t_rec      index by varchar2(30);    -- array(text)
  
  type t_array_d2nn is table of t_array_d1n index by pls_integer;    -- array(number)(number)
  type t_array_d2tt is table of t_array_d1t index by varchar2(30);   -- array(text)(text)
  
  type t_array_d2nt is table of t_array_d1t index by pls_integer;    -- array(number)(text)
  type t_array_d2tn is table of t_array_d1n index by varchar2(30);   -- array(text)(number)
  
  type t_nesttab_d1 is table of t_rec;
  type t_nesttab_d2 is table of t_nesttab_d1;
     
  l_node_rec       t_rec;
  l_array_d1n      t_array_d1n;                        -- array(number)
  l_array_d1t      t_array_d1t;                        -- array(text)
  l_array_d2nn     t_array_d2nn;                        -- array(number)(number)
  l_array_d2tt     t_array_d2tt;                        -- array(text)(text)
  l_array_d2nt     t_array_d2nt;                        -- array(number)(text)
  l_array_d2tn     t_array_d2tn;                        -- array(text)(number)
  l_nesttab_d1     t_nesttab_d1 := new t_nesttab_d1(); -- nested Table (number)
  l_nesttab_d2     t_nesttab_d2 := new t_nesttab_d2(); -- nested Table (number)(number)
  l_node_map_sep   varchar2(1) := '.';
  l_start_time     number;
  l_mb             varchar2(10);
  l_last_num       number;
  l_pid            number;
  l_sid            number;
  l_serial#        number;
  l_run            number := 0;
  l_pga_status     varchar2(10) := 'NOT';
  
  procedure reset as
  begin
    l_array_d1n.delete;
    l_array_d1t.delete;
    l_array_d2nn.delete;
    l_array_d2tt.delete;
    l_array_d2nt.delete;
    l_array_d2tn.delete;
    l_nesttab_d1.delete;
    l_nesttab_d2.delete;
    dbms_session.free_unused_user_memory;
    l_start_time := dbms_utility.get_time;
  end;

  procedure prt_and_reset_mem(p_name varchar2) as
  begin
    select lpad(round(pga_used_mem/1024/1024), 5, ' '), pid, s.sid, s.serial#
      into l_mb, l_pid, l_sid, l_serial#
      from v$process p, v$session s
     where p.addr = s.paddr and s.sid = sys.dbms_support.mysid and rownum = 1;
    dbms_output.put_line(rpad(l_run, 3, ' ')||p_name||', pga_used(MB) = '||l_mb||
             ', Elapsed(Second)= '||round((dbms_utility.get_time - l_start_time)/100, 2));
             
    execute immediate q'[alter session set events 'immediate trace name PGA_DETAIL_GET level ]'||l_pid||q'[']'; 
    -- wait status = COMPLETE when status = SCANNING
    while (true) loop
     select status into l_pga_status from v$process_memory_detail_prog where pid = l_pid;
     exit when l_pga_status = 'COMPLETE';
     dbms_lock.sleep(0.1);
    end loop;
    delete from process_memory_detail_v 
     where pid = l_pid and session_id = l_sid and session_serial# = l_serial# and run = l_run;
    insert into process_memory_detail_v select l_run, p_name, systimestamp, l_sid, l_serial#, v.* 
      from v$process_memory_detail v where pid = l_pid;
    -- outcomment to keep last PGA_DETAIL
    -- execute immediate q'[alter session set events 'immediate trace name PGA_DETAIL_CANCEL level ]'||l_pid||q'[']';  
    commit; 
    
    l_run := l_run + 1;
    reset;
  end;
  
  function rec(id number, name varchar2, num_1 number, num_2 number, txt_1 varchar2, txt_2 varchar2) 
    return t_rec as
  begin
    l_node_rec.id        := id;
    l_node_rec.name      := name;
    l_node_rec.num_1     := num_1;
    l_node_rec.num_2     := num_2;
    l_node_rec.txt_1     := txt_1;
    l_node_rec.txt_2     := txt_2;
    return l_node_rec;
  end;
    
  procedure run (p_cnt number := 1000000) as
  begin
    reset;
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d1n(c.id) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 1 array(id)  ');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d1t(c.name) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 1 array(name)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2nn(c.num_1)(c.num_2) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;

    ---- lookup example ----
    --l_start_time := dbms_utility.get_time;
    --for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
    --  l_node_rec := l_array_d2nn(c.num_1)(c.num_2);
    --end loop;

    prt_and_reset_mem('Dimension 2 array(num_1)(num_2)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2nn(c.num_2)(c.num_1) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(num_2)(num_1)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2tt(c.txt_1)(c.txt_2) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(txt_1)(txt_2)');   
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2tt(c.txt_2)(c.txt_1) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(txt_2)(txt_1)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2nt(c.num_1)(c.txt_2) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(num_1)(txt_2)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2nt(c.num_2)(c.txt_1) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(num_2)(txt_1)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2tn(c.txt_1)(c.num_2) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(txt_1)(num_2)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab where rownum <= p_cnt) loop
      l_array_d2tn(c.txt_2)(c.num_1) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 2 array(txt_2)(num_1)');
    
    --- nested table. Note: specify p_cnt = 1000000 to select all rows for test ---
    dbms_output.put_line('');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab 
               where rownum <= p_cnt order by id) loop
     l_nesttab_d1.extend;
     l_nesttab_d1(l_nesttab_d1.last) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
    end loop;
    prt_and_reset_mem('Dimension 1 table(id)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab 
               where rownum <= p_cnt order by num_1, num_2) loop
     if (c.num_1 != l_last_num) then
      l_nesttab_d2.extend;
      l_nesttab_d2(l_nesttab_d2.last) := l_nesttab_d1;
      l_nesttab_d1 := new t_nesttab_d1();
     end if;
     l_nesttab_d1.extend;
     l_nesttab_d1(l_nesttab_d1.last) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
     l_last_num := c.num_1;
    end loop;
    l_nesttab_d2.extend;
    l_nesttab_d2(l_nesttab_d2.last) := l_nesttab_d1;

    ---- lookup example ----
    --l_start_time := dbms_utility.get_time;
    --for i in 1..l_nesttab_d2.count loop
    -- l_nesttab_d1 := l_nesttab_d2(i);
    -- for j in 1..l_nesttab_d1.count loop
    --   l_node_rec := l_nesttab_d1(j);
    -- end loop;
    --end loop;

    prt_and_reset_mem('Dimension 2 table(num_1)(num_2)');
    
    for c in (select id, name, num_1, num_2, txt_1, txt_2 from mem_tab 
               where rownum <= p_cnt order by num_2, num_1) loop
     if (c.num_2 != l_last_num) then
      l_nesttab_d2.extend;
      l_nesttab_d2(l_nesttab_d2.last) := l_nesttab_d1;
      l_nesttab_d1 := new t_nesttab_d1();
     end if;
     l_nesttab_d1.extend;
     l_nesttab_d1(l_nesttab_d1.last) := rec(c.id, c.name, c.num_1, c.num_2, c.txt_1, c.txt_2);
     l_last_num := c.num_2;
    end loop;
    l_nesttab_d2.extend;
    l_nesttab_d2(l_nesttab_d2.last) := l_nesttab_d1;
    prt_and_reset_mem('Dimension 2 table(num_2)(num_1)');    
  end;     
end;
/

-- set serveroutput on
-- exec test_collection_dimm_mem.run(1000000);

Friday, September 9, 2016

dbms_session.get_package_memory_utilization and limitations

Oracle 11g Release 2 extends dbms_session Package by introducing a new Procedure:
    get_package_memory_utilization
and 11.2.0.4.0 further enhanced with a second overloaded procedure to augment the measure from 2**31-1 up to 10**38.

It describes memory usage of PL/SQL package declare variables in either specification or body. but not memory usage of locally declared variables within functions, procedures, or anonymous blocks.

Oracle Documentation said:
    These procedures describe static package memory usage.
    The output collections describe memory usage in each instantiated package.

Probably "static package memory usage" stands for memory usage of package declared variables.

The second restriction is that it is bound to caller's session, and does not disclose heap allocation details.

This Blog till try to demonstrate such limitations.

See appended Test Code 1, 2 and its used dump_mem_usage in the Reference 3.

1. Test


SQL > exec pkg_mem_test.run_1(1000*1000);

-------- 1. PKG_MEM_TEST.run_1 START --------
v$process: (SID=854)Used/Alloc/Freeable/Max=6/8/1/11
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/7,PL/SQL/3/3/3,Other/3//3,Freeable/1/0/,
[Owner; Unit; Type; Used; Free; sum_abs_mb(1)]
S; PKG_MEM_TEST; 9  (package); 0; 0; 0
S; PKG_MEM_TEST; 11 (package body); 0; 0; 0

-------- 2. PKG_MEM_TEST.run_1 spec_rec_tab ALLOCATED --------
v$process: (SID=854)Used/Alloc/Freeable/Max=1282/1283/0/1283
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/1215/1211/1215,Other/68//68,
[Owner; Unit; Type; Used; Free; sum_abs_mb(1207)]
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206
S; PKG_MEM_TEST; 11 (package body); 0; 0; 0

-------- 3. PKG_MEM_TEST.run_1 body_rec_tab ALLOCATED --------
v$process: (SID=854)Used/Alloc/Freeable/Max=2496/2497/0/2497
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/2426/2420/2426,Other/71//71,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 4. PKG_MEM_TEST.run_1 local_rec_tab ALLOCATED --------
v$process: (SID=854)Used/Alloc/Freeable/Max=3710/3712/0/3712
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/3638/3629/3638,Other/73//73,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 5. PROC_MEM_TEST START --------
v$process: (SID=854)Used/Alloc/Freeable/Max=3710/3712/0/3712
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/3638/3629/3638,Other/73//73,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 6. PROC_MEM_TEST local_rec_tab ALLOCATED --------
v$process: (SID=854)Used/Alloc/Freeable/Max=4924/4926/0/4926
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/4850/4838/4850,Other/76//76,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 7. PKG_MEM_TEST.run_2 START --------
v$process: (SID=854)Used/Alloc/Freeable/Max=4924/4926/0/4926
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/4850/4838/4850,Other/76//76,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 8. PKG_MEM_TEST.run_2 local_rec_tab ALLOCATED --------
v$process: (SID=854)Used/Alloc/Freeable/Max=6138/6141/0/6141
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/6062/6047/6062,Other/78//78,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 9. PROC_MEM_TEST END --------
v$process: (SID=854)Used/Alloc/Freeable/Max=4924/6141/0/6141
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/4850/4838/6062,Other/1291//1291,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

-------- 10. PKG_MEM_TEST.run_1 END --------
v$process: (SID=854)Used/Alloc/Freeable/Max=3710/6141/0/6141
v$process_memory: Category/Alloc/Used/Max=SQL/0/0/47,PL/SQL/3638/3629/6062,Other/2503//2503,
[Owner; Unit; Type; Used; Free; sum_abs_mb(2412)]
S; PKG_MEM_TEST; 11 (package body); 1177; 28; 1206
S; PKG_MEM_TEST; 9  (package); 1177; 28; 1206

In Step 8, the total used memory is;
     6138 MB
but only 2412 from
    PKG_MEM_TEST; 11 (package body); 1177
  PKG_MEM_TEST; 9  (package); 1177
are listed.

The 3 local allocations in Steps:
  4. PKG_MEM_TEST.run_1 local_rec_tab
  6. PROC_MEM_TEST local_rec_tab
  8. PKG_MEM_TEST.run_2 local_rec_tab
are not reported.

2. Populate process_memory_detail


dbms_session.get_package_memory_utilization is restricted to report memory usage of caller's session, aggregated by each package (spec and body) name, which is a point of view in applications, and does not point out the details of allocated heap details.

V$PROCESS_MEMORY_DETAIL (Oracle 10.2 introduced) lists PGA memory usage by category, heap_name, and component name for each Oracle process, and summarized in V$PROCESS_MEMORY by category. They expose memory usage in the dimension of heap components, a point of view in program code.

Test Code 2 provides a V$PROCESS_MEMORY_DETAIL sampling tool to collect and monitor memory changes.

Run:

exec pga_sampling(p_sid => 234);

Query memory usage per sampling and display memory deviation by:

select v.*, mb - lag(mb) over(order by run) mb_delta 
  from(
    select run, timestamp, session_id, session_serial#, pid
          ,round(sum(bytes)/1024/1024) mb
          ,sum(allocation_count) allocation_count
    from process_memory_detail_v 
    group by run, timestamp, session_id, session_serial#, pid) v
order by run;     

List top 3 heap memory usages for each sampling by:

select run, timestamp, session_id, pid, round(sum(bytes)/1024/1024) mb,
       (select listagg(rpad(name||'('||round(bytes/1024/1024)||', '||allocation_count||')', 40, chr(32)), '; ') 
               within group (order by bytes desc) agg
          from (select * from process_memory_detail_v c 
                 where c.run = v.run and c.pid = v.pid and bytes > 0
                 order by bytes desc) c 
         where rownum <= 3
         ) "NAME(MB, ALLOC_COUNT) List"
from process_memory_detail_v v
group by run, timestamp, session_id, pid
order by run, timestamp, session_id, pid;

Join above query with v$active_session_history (or dba_hist_active_sess_history), one can approximately locate the memory usage related to PL/SQL program unit. Starting from this PL/SQL program unit, one can use 12c UTL_CALL_STACK (or BMS_UTILITY.FORMAT_CALL_STACK) to backtrace all call_stack involved units.

with sq as (
  select run, timestamp, v.session_id, session_serial#, pid, round(sum(bytes)/1024/1024) mb
        ,(select listagg(rpad(name||'('||round(bytes/1024/1024)||', '||allocation_count||')', 40, chr(32)), '; ') 
                 within group (order by bytes desc) agg
            from (select * from process_memory_detail_v c 
                   where c.run = v.run and c.pid = v.pid and bytes > 0
                   order by bytes desc) c 
           where rownum <= 3
           ) "NAME(MB, ALLOC_COUNT) List"
  from process_memory_detail_v v
  group by run, timestamp, v.session_id, session_serial#, pid)
select p.*
      ,(select owner||'.'||object_name||
               case when procedure_name is not null then '.' || procedure_name end
         from dba_procedures
        where object_id = s.plsql_entry_object_id and subprogram_id = s.plsql_entry_subprogram_id)
       plsql_entry
      ,s.sample_time, round(s.pga_allocated/1024/1024) pga_allocated_mb
  from sq p, v$active_session_history s
where p.session_id = s.session_id
  and p.session_serial# = s.session_serial#
  and p.timestamp between s.sample_time and s.sample_time + interval'1' second
 order by run, timestamp, p.session_id, p.session_serial#, pid, s.sample_time;


3. PGA memory internals


Look output of the first query in above Section, pick one RUN with some peak memory usage, for example, 4, Run query:

select run, category, name, heap_name, depth, path
      ,round(sum(bytes/1024)) kb, sum(allocation_count) alloc_count
      ,heap_descriptor, parent_heap_descriptor, cycle
from (
  select v.*, (level-1) depth
        ,sys_connect_by_path('('||category||' , '||name||' , '||heap_name||')', ' -> ') path
        ,connect_by_iscycle as cycle
  from process_memory_detail_v v
  where lower(name) like '%recursive addr reg file%'
  start with parent_heap_descriptor = '00' and run = 4
  connect by nocycle prior heap_descriptor = parent_heap_descriptor and prior run = run
) 
--where lower(name) like '%recursive addr reg file%'
group by run, category, name, heap_name, heap_descriptor, parent_heap_descriptor, depth, path, cycle
--having sum(bytes/1024) > 1024
order by run, category, name, heap_name, depth, kb;

  • It depicts the PGA heap tree structure by connecting heap_descriptor with parent_heap_descriptor.
  • We can observe that only "Other" Category has depth 0 (root) nodes, and all other Categories are subtrees to "Other".
    (One exception is when CATEGORY = 'PL/SQL' and NAME='miscellaneous', both heap_descriptor and parent_heap_descriptor equal to '00', causing cycle)
  • One time we noticed certain high PGA memory consumptions. By running above query, it turns out that the main contribution is due to "recursive addr reg file". Further searching Oracle MOS, it is documented as something related to PLSQL anonymous blocks.
    (MOS Bug 9478199 - Memory corruption / ORA-600 from PLSQL anonymous blocks (Doc ID 9478199.8)).
  • dbms_session.get_package_memory_utilization is hard to inject into existing code, let alone Oracle background processes. However, populating process_memory_detail opens a tiny door to peer Oracle internals, even for background processes, for example, PMON, DBWx, CJQ0, MMON.

4. Test Code 1


create or replace package pkg_mem_test is
  type t_rec        is record (id number, text varchar2(1000));
  type t_rec_tab    is table of t_rec index by pls_integer;
  spec_rec_tab      t_rec_tab;
  
  procedure run_1(p_cnt number);
  procedure run_2(p_cnt number);
end;
/

create or replace procedure proc_mem_test(p_cnt number) as
  type t_rec      is record (id number, text varchar2(1000));
  type t_rec_tab  is table of t_rec index by pls_integer;
  local_rec_tab   t_rec_tab;
begin
  dump_mem_usage.run('PROC_MEM_TEST START');

  select level id, rpad('ABC', 1000, 'X') text 
  bulk collect into local_rec_tab
  from dual connect by level <= p_cnt;
  
  dump_mem_usage.run('PROC_MEM_TEST local_rec_tab ALLOCATED');
  
  pkg_mem_test.run_2(p_cnt);
  
  dump_mem_usage.run('PROC_MEM_TEST END');
end;
/

create or replace package body pkg_mem_test is
  body_rec_tab      t_rec_tab;
  
  procedure run_1(p_cnt number) as
    local_rec_tab   t_rec_tab;
  begin
    dump_mem_usage.run('PKG_MEM_TEST.run_1 START');
  
    select level id, rpad('ABC', 1000, 'X') text 
    bulk collect into spec_rec_tab
    from dual connect by level <= p_cnt;
  
    dump_mem_usage.run('PKG_MEM_TEST.run_1 spec_rec_tab ALLOCATED');
    
    select level id, rpad('ABC', 1000, 'X') text 
    bulk collect into body_rec_tab
    from dual connect by level <= p_cnt;
  
    dump_mem_usage.run('PKG_MEM_TEST.run_1 body_rec_tab ALLOCATED');
    
    select level id, rpad('ABC', 1000, 'X') text 
    bulk collect into local_rec_tab
    from dual connect by level <= p_cnt;
  
    dump_mem_usage.run('PKG_MEM_TEST.run_1 local_rec_tab ALLOCATED');
    
    proc_mem_test(p_cnt);
    
    dump_mem_usage.run('PKG_MEM_TEST.run_1 END');
  end;
  
  procedure run_2(p_cnt number) as
    local_rec_tab    t_rec_tab;
  begin
    dump_mem_usage.run('PKG_MEM_TEST.run_2 START');
  
    select level id, rpad('ABC', 1000, 'X') text 
    bulk collect into local_rec_tab
    from dual connect by level <= p_cnt;
  
    dump_mem_usage.run('PKG_MEM_TEST.run_2 local_rec_tab ALLOCATED');  
    end;
end;
/

-- exec pkg_mem_test.run_1(1000*1000);

5. Test Code 2


drop table process_memory_detail_v;

create table process_memory_detail_v as 
  select 123 run, rpad('A', 40, 'X') run_name, timestamp'1998-02-17 11:22:00' timestamp
        ,234 session_id, 345 session_serial#, v.* 
  from v$process_memory_detail v where 1=2;
  
-- sampling for p_dur seconds in p_interval seconds interval.
create or replace procedure pga_sampling(p_sid number, p_dur number := 120, p_interval number := 1) as
  l_start_time     number := dbms_utility.get_time;
  l_sample_time    number;
  l_sleep_time     number;
  l_pid            number;
  l_sid            number;
  l_serial#        number;
  l_run            number := 0;
  l_pga_status     varchar2(10) := 'NOT';
  l_run_name       varchar2(40) := 'PGA Sampling('||p_sid||', '||p_dur||', '||p_interval||')';
begin
  select pid, s.sid, s.serial#
    into l_pid, l_sid, l_serial#
    from v$process p, v$session s
   where p.addr = s.paddr and s.sid = p_sid and rownum = 1;  
               
  l_sample_time := dbms_utility.get_time;
  while ((l_sample_time - l_start_time)/100 < p_dur) loop 
    execute immediate q'[alter session set events 'immediate trace name PGA_DETAIL_GET level ]'||l_pid||q'[']'; 
    -- wait status = COMPLETE when status = SCANNING, or ENABLED but elapsed time is over duration
    while (true) loop
     select status into l_pga_status from v$process_memory_detail_prog where pid = l_pid;
     exit when l_pga_status = 'COMPLETE' or ((dbms_utility.get_time - l_start_time)/100 > p_dur);
     dbms_lock.sleep(0.1);
    end loop;
    delete from process_memory_detail_v 
     where pid = l_pid and session_id = l_sid and session_serial# = l_serial# and run = l_run;
    insert into process_memory_detail_v select l_run, l_run_name, systimestamp, l_sid, l_serial#, v.* 
      from v$process_memory_detail v where pid = l_pid;
    -- outcomment to keep last PGA_DETAIL
    -- execute immediate q'[alter session set events 'immediate trace name PGA_DETAIL_CANCEL level ]'||l_pid||q'[']';  
    commit; 
    l_run := l_run + 1;    
    l_sleep_time := p_interval - (dbms_utility.get_time - l_sample_time)/100;
    if l_sleep_time > 0 then
     dbms_lock.sleep(l_sleep_time);
    end if;
    l_sample_time := dbms_utility.get_time;
  end loop;
end;
/
    
-- exec pga_sampling(p_sid => 234);

References

  1. Oracle MOS Notes: How To Find Where The Memory Is Growing For A Process (Doc ID 822527.1)
  2. Oracle Memory Troubleshooting, Part 4: Drilling down into PGA memory usage with V$PROCESS_MEMORY_DETAIL 
  3. dbms_session package_memory_utilization
 
 

Tuesday, August 2, 2016

ORA-04025 library object lock on AQ queue and SQL cursor

AQ dequeue throws:
     ORA-04025: maximum allowed library object lock allocated for S.KSUN_QUEUE
in an Oracle 12c batch application when number of concurrent jobs greater than 65, where KSUN_QUEUE is the queue name, and DB is set up with following relevant parameters:
  _session_cached_instantiations       integer     1001
  cursor_sharing                       string      exact
  cursor_space_for_time                boolean     FALSE
  open_cursors                         integer     800
  session_cached_cursors               integer     600


Adhere to Oracle Documentation on ORA-04025:
  Cause:  Too many active locks for the object has been allocated.
               This error can be a result of too many cursors kept open per session.
  Action: Close cursors, reduce session_cached_cursors value.

Halving session_cached_cursors from 600 to 300 does not make the error disappear.

This Blog will try to demonstrate:
(a). there probably exists a hard limit of 65535 locks per library object:
             LIBRARY OBJECT LOCK: MAXIMUM 65535
       internally it could be implemented by a 16-bits u16.
(b). locks per Oracle session is imposed by
             _session_cached_instantiations
(c). maximum number of concurrent sessions to avoid ORA-04025 is
             floor(65535/_session_cached_instantiations)
      It is 65 for above settings.

Tests done on Solaris and AIX with Oracle 12.1.0.2.0.
See Blog appended Test Code.
Note: to make the test reproducible, before each test, restart DB to remove any un-released locks.

Update: By using the test code provided in this Blog, the Bug was investigated and fixed in Sep 2016 by:
    Patch 24509056: NUMBER OF LIBRARY OBJECT LOCKS OF QUEUE INCREASED AFTER EACH DEQUEUE IN 12.1.0.2

1. Library object lock on AQ queue


1.1 Single session dequeue/enqueue test:


SQL> exec test_deq(1200, 0, 0, true);
 deq_cnt: 1, lock_cnt: 1
 deq_cnt: 2, lock_cnt: 2
 deq_cnt: 3, lock_cnt: 3
 ...
 deq_cnt: 1000, lock_cnt: 1000
 deq_cnt: 1001, lock_cnt: 1001
 deq_cnt: 1002, lock_cnt: 1002
 deq_cnt: 1003, lock_cnt: 1002
 ...
 deq_cnt: 1199, lock_cnt: 1002
 deq_cnt: 1200, lock_cnt: 1002

It shows that number of locks on the queue object is increased for each dequeue call till 1002,
and then maintained at 1002.

Open a new session, make a similar enqueue test.

  SQL> exec test_enq(1200, 0, true);

The lock behavior is similar to dequeue. Therefore all the discussion on dequeue applies to enqueue as well.

1.2   66 Batch sessions dequeue test


In this test, we will start 66 dequeue sessions to provoke ORA-04025. All 3 monitoring queries can be run during whole tests to watch the progress of locks. Once total number of locks on KSUN_QUEUE is approaching to 65535, some sessions raise ORA_04025.

(A). Launch 66 concurrent Jobs by:


  SQL> exec test_deq_jobs(66, 2000);  

After about 18 minutes (1003 times of one-second dequeue wait), we will see some trace files like:

 ORA-12012: error on auto execute of job 86367
 ORA-04025: maximum allowed library object lock allocated for S.KSUN_QUEUE
 ORA-06512: at "S.TEST_DEQ", line 40
 ORA-06512: at line 1

(B). Watch total number of locks on KSUN_QUEUE:


SQL> select name, locks, pins, locked_total, pinned_total
       from v$db_object_cache l
      where hash_value = 3868150627 or name = 'KSUN_QUEUE';

 NAME         LOCKS  PINS LOCKED_TOTAL PINNED_TOTAL
 ----------- ------ ----- ------------ ------------
 KSUN_QUEUE   65106     0       321935       428400

SQL> select kglnaobj||'('||kglobtyd||')' name, object_handle, type, mode_held, mode_requested, count(*) cnt
       from v$libcache_locks l, x$kglob o
      where o.kglnaobj = 'KSUN_QUEUE'
        and l.object_handle = o.kglhdadr
      group by object_handle, kglnaobj, kglobtyd, type, mode_held, mode_requested;

 NAME               OBJECT_HANDLE    TYPE  MODE_HELD MODE_REQUESTED    CNT
 ------------------ ---------------- ---- ---------- -------------- ------
 KSUN_QUEUE(QUEUE)  0000000167A4B2E0 LOCK          1              0  65224

(C). Watch number of locks per session on KSUN_QUEUE:


SQL> with sq as
          (select /*+ materialize */ sid, program, saddr, event
              ,to_char(p1, 'xxxxxxxxxxxxxxx') p1, p1text
              ,to_char(p2, 'xxxxxxxxxxxxxxx') p2, p2text
              ,to_char(p3, 'xxxxxxxxxxxxxxx') p3, p3text
           from v$session
          where program like '%(J%' or module like '%deq%')
     select s.sid, holding_user_session, object_handle, l.type, mode_held, mode_requested,
            count(*) lock_cnt     -- it goes up to maximum 1003, and then back 1002
       from v$libcache_locks l, x$kglob o, sq s
      where o.kglnaobj = 'KSUN_QUEUE'
        and l.object_handle = o.kglhdadr
        and l.holding_user_session = s.saddr
     group by s.sid, holding_user_session, object_handle, l.type, mode_held, mode_requested
     order by lock_cnt desc, s.sid, holding_user_session, l.type, mode_held, mode_requested;

   SID HOLDING_USER_SES OBJECT_HANDLE    TYPE  MODE_HELD MODE_REQUESTED   LOCK_CNT
 ----- ---------------- ---------------- ---- ---------- -------------- ----------
     6 000000018FA49D60 0000000167A4B2E0 LOCK          1              0       1003
    70 000000018FA93AA0 0000000167A4B2E0 LOCK          1              0       1003
 ...
  1058 000000018FE84A60 0000000167A4B2E0 LOCK          1              0       1002
  1059 000000018D19A168 0000000167A4B2E0 LOCK          1              0       1002
   173 000000018ED35840 0000000167A4B2E0 LOCK          1              0        739
  1025 000000018D177398 0000000167A4B2E0 LOCK          1              0        739        

66 rows selected.        

Examining maximum number of locks per session, they oscillate between 1002 and 1003.

There exist sessions whose LOCK_CNT is not able to reach 1002 because
    66 x 1002 > 65535
hence ORA-04025.

(D). Match Oracle parameters with value like '100%':


To figure out what is the limit of 1003, run:

SQL> select name, value from v$parameter where value like '100%';

 NAME                             VALUE
 -------------------------------- -----
 _session_cached_instantiations   1001

That is _session_cached_instantiations.

(E). Finally clean-up all jobs

  SQL> exec clean_jobs;

1.3   65 Batch sessions dequeue test


Repeat the same test by launching 65 concurrent Jobs. No ORA-04025 occurs.

  SQL> exec test_deq_jobs(65, 2000);

Wait about 18 minutes, run above 3 monitoring queries.
Here the output:

SQL> query_1

 NAME         LOCKS  PINS LOCKED_TOTAL PINNED_TOTAL
 ----------- ------ ----- ------------ ------------
 KSUN_QUEUE   65130     0      2493796      3324170

SQL> query_2

 NAME               OBJECT_HANDLE    TYPE  MODE_HELD MODE_REQUESTED    CNT
 ------------------ ---------------- ---- ---------- -------------- ------
 KSUN_QUEUE(QUEUE)  0000000167A4B2E0 LOCK          1              0  65130
 
SQL> query_3
 
   SID HOLDING_USER_SES OBJECT_HANDLE    TYPE  MODE_HELD MODE_REQUESTED   LOCK_CNT
 ----- ---------------- ---------------- ---- ---------- -------------- ----------
     6 000000018FA49D60 0000000167A4B2E0 LOCK          1              0       1003
    40 000000018FA6CB30 0000000167A4B2E0 LOCK          1              0       1003
 ...   
  1027 000000018D1752C8 0000000167A4B2E0 LOCK          1              0       1002
  1058 000000018FE84A60 0000000167A4B2E0 LOCK          1              0       1002

65 rows selected.        

All sessions can own a minimum LOCK_CNT of 1002 because
    65 x 1002 < 65535
hence no ORA-04025.

To terminate the test, clean-up all jobs

  SQL> exec clean_jobs;

1.4 _session_cached_instantiations


Lowering _session_cached_instantiations from 1001 to 401, restart DB:
   alter system set "_session_cached_instantiations"=401 scope=spfile;
repeat the test, no more error even for 100 Jobs; but error reappears with 200 Jobs.

Since not clear what is the limit of "_session_cached_instantiations", it is worth of gambling its extreme behavior with two additional tests, one with 65535, another with 65536.

alter system set "_session_cached_instantiations"=65535 scope=spfile;

restart DB

SQL> exec test_deq(65536, 0, 0);

 deq_cnt: 65536, lock_cnt: 65535
 ORA-04025: maximum allowed library object lock allocated for S.KSUN_QUEUE

Then make some unrealistic adventure:

alter system set "_session_cached_instantiations"=65536 scope=spfile;

restart DB

SQL> exec test_deq(65536, 0, 0);

 deq_cnt: 65536, lock_cnt: 65535
 ORA-04025: maximum allowed library object lock allocated for K.KSUN_QUEUE


Both identical outputs show that sessions raise ORA-04025 at limit 65535. "_session_cached_instantiations" has to be less than 65535.

1.5   12c vs. 11g dequeue behavior


Running the same tests in 11.2.0.4.0, number of locks per session on KSUN_QUEUE is constantly kept as 1. Hence, 12c dequeue behavior is perturbed, in which each dequeue call opens a new lock on the queue, but never release it till ceiling _session_cached_instantiations. One can wonder if that can be classified as a library object lock leak.

Oracle Database Advanced Queuing 12c seems introduced some new AQ background architecture, for example, AQPC (GV$AQ_BACKGROUND_COORDINATOR).

1.6 MOS Note


Resorting to MOS, there is one hit of 65535 hinting to ORA-4025.

MOS Bug 14542720 - ORA-4025 with CURSOR_SHARING / Dynamic Sampling (Doc ID 14542720.8) said:
  Look for the ActiveLocks value of some cursor to show very high values  (when it reaches 65535 the ora-4025 is raised)

2. SQL cursor and ORA-04025


As a general understanding of ORA-04025, it is necessary to examine its original designation for SQL Cursors. Pick the JAVA test code provided by Oracle MOS, we will make 3 tests.

Note: the same Java code is tested on both 11.2.0.4.0 and 12.1.0.2.0, the behaviors are identical.

At first, set open_cursors:
  alter system set open_cursors=50000 scope=both;

2.1 Single session


Start one single session, monitor by:

SQL> select sid, sql_id, sql_text, count(*) cnt
       from v$open_cursor
      where cursor_type in ('OPEN')
      group by sid, sql_id, sql_text
      order by cnt desc;

when opened cursors reaching 50000, it hits:
 ORA-01000: maximum open cursors exceeded
    
$1 > java OracleJdbcExample4025
 Count=49998, Date : 2016-07-21 10:50:17
 Count=49999, Date : 2016-07-21 10:50:17
 Exception in thread "main" java.sql.SQLException: ORA-01000: maximum open cursors exceeded


2.2 Two concurrent sessions


Start two sessions, both throw ORA-04025:

$1 > java OracleJdbcExample4025

 Count=25115, Date : 2016-07-21 11:46:45.0
 Count=25116, Date : 2016-07-21 11:46:45.0
 Exception in thread "main" java.sql.SQLException: 
  ORA-04025: maximum allowed library object lock allocated for select sysdate as current_day from dual

$2 > java OracleJdbcExample4025

 Count=40418, Date : 2016-07-21 11:46:45.0
 Count=40419, Date : 2016-07-21 11:46:45.0
 Exception in thread "main" java.sql.SQLException: 
  ORA-04025: maximum allowed library object lock allocated for select sysdate as current_day from dual

Summing two outputs agrees the equality:
   25116 + 40419 = 65535
which confirms "LIBRARY OBJECT LOCK: MAXIMUM 65535".

2.3 Boundary value


According to Oracle Documentation, open_cursors has a legal range of values:
    0 to 65535
what happens when setting open_cursors=65535 ?
Both ORA-01000 and ORA-04025 are satisfied by boundary value 65535, which one is raised at first ? (second will never be presented).

Here the test output:

alter system set open_cursors=65535 scope=both;

$1 > java OracleJdbcExample4025

  Count=65533, Date : 2016-07-22 08:01:49.0
  Count=65534, Date : 2016-07-22 08:01:49.0
  Exception in thread "main" java.sql.SQLException: ORA-01000: maximum open cursors exceeded


So we can guess that ORA-04025 is ordered after ORA-01000 in the code path. That is also why to provoke ORA-04025 for SQL cursor, minimum two sessions are required.

Summary


Recapping all the tests, we can draw a few possible observations, but I would be careful to have any earlier conclusions before Oracle says.

1. SQL cursor is a library object, but not each library object is a SQL cursor, for example, AQ queue.

2. OPEN_CURSORS specifies the maximum number of open cursors per session can have at each instant. HARD Limit.

3. SESSION_CACHED_CURSORS specifies the number of cursors to cache per session, an optimization advice. SOFT Limit.

4. OPEN_CURSORS, SESSION_CACHED_CURSORS are parameters applied to SQL cursor.

5. "LIBRARY OBJECT LOCK: MAXIMUM 65535" specifies limit per library object. HARD Limit.

6. 12c _SESSION_CACHED_INSTANTIATIONS specifies number of library object lock on AQ queue per session.
    HARD Limit, but not raising error.

7. "LIBRARY OBJECT LOCK: MAXIMUM 65535" and _SESSION_CACHED_INSTANTIATIONS together defines
     maximum number of concurrent sessions to avoid ORA-04025:
            floor(65535/_session_cached_instantiations)

References


1. open_cursors, session_cached_cursors: cursordump

2. Oracle 12c single session "library cache lock (cycle)" deadlock

3. Foreign Keys and Library Cache Locks: Analysis

Test Code



------------------------------------- AQ dequeue ---------------------------------
--alter system set "_session_cached_instantiations"=1001 scope=spfile;
--alter system set open_cursors=800 scope=both;
--alter system set session_cached_cursors=600 scope=spfile;

drop type Q_OBJ force;

create or replace type Q_OBJ is object (id number, name varchar2(100));
/       

begin sys.dbms_aqadm.drop_queue_table(queue_table => 'Q_TAB', force=> TRUE); end;
/

begin
  sys.dbms_aqadm.create_queue_table
  (queue_table        => 'Q_TAB'
  ,queue_payload_type => 'Q_OBJ'
  ,compatible         => '10.0.0'  
  ,sort_list          => 'PRIORITY,ENQ_TIME'
  ,multiple_consumers =>  false
  ,message_grouping   =>  0
  ,comment            =>  'queue table'
  ,secure             =>  false);
end;
/

begin
  sys.dbms_aqadm.stop_queue (queue_name => 'KSUN_QUEUE');
  sys.dbms_aqadm.drop_queue (queue_name => 'KSUN_QUEUE');
end;
/

begin
  sys.dbms_aqadm.create_queue
  (queue_name     => 'KSUN_QUEUE'
  ,queue_table    => 'Q_TAB'
  ,queue_type     => sys.dbms_aqadm.normal_queue
  ,max_retries    => 100
  ,retry_delay    => 2
  ,retention_time => 604800
  ,comment        => 'ksun queue');
end;
/

begin sys.dbms_aqadm.start_queue(queue_name => 'KSUN_QUEUE', enqueue => true, dequeue => true); end;
/
  
create or replace procedure test_enq(p_cnt number := 1, p_sleep number := 0, p_prt boolean :=false) as
  l_enqueue_options dbms_aq.enqueue_options_t;
  l_msg_properties  dbms_aq.message_properties_t;
  l_msg_handle      raw(16);
  l_msg             q_obj;
  l_lock_cnt        number;
  
  procedure prt(i_cnt number, i_prt boolean) as
   begin
    if i_prt then
     select locks into l_lock_cnt from v$db_object_cache where name = 'KSUN_QUEUE';
     dbms_output.put_line ('deq_cnt: '||i_cnt||', lock_cnt: '||l_lock_cnt);
   end if;
   end;
begin 
 dbms_application_info.set_module(module_name => 'enq session', action_name => 'enqueue');
  for i in 1..p_cnt loop
    l_msg := q_obj(i, rpad(i, 10, 'X'));
            
    dbms_aq.enqueue(queue_name         => 'KSUN_QUEUE',
                    enqueue_options    => l_enqueue_options,
                    message_properties => l_msg_properties,
                    payload            => l_msg,
                    msgid              => l_msg_handle);
    prt(i, p_prt);
    
    commit;
    
    if p_sleep > 0 then
      dbms_lock.sleep(p_sleep);
    end if;
  end loop;
end;
/

create or replace procedure test_deq(p_cnt number := 1, p_sleep number := 0, p_wait number := 1, 
                                     p_prt boolean :=false) as
 l_dequeue_options     dbms_aq.dequeue_options_t;
 l_msg_properties      dbms_aq.message_properties_t;
 l_msg_handle          raw(16);
 l_msg                 q_obj;
 l_lock_cnt            number;
 no_messages_excp      exception;
 pragma exception_init (no_messages_excp, -25228);
  
  procedure prt(i_cnt number, i_prt boolean) as
   begin
    if i_prt then
     select locks into l_lock_cnt from v$db_object_cache where name = 'KSUN_QUEUE';
     dbms_output.put_line ('deq_cnt: '||i_cnt||', lock_cnt: '||l_lock_cnt);
   end if;
   end;
begin
 dbms_application_info.set_module(module_name => 'deq session', action_name => 'dequeue');
 l_dequeue_options.wait := p_wait;
  for i in 1..p_cnt loop
  begin 
   if p_sleep > 0 then
    dbms_lock.sleep(p_sleep);
   end if;
   
    dbms_aq.dequeue(queue_name            => 'KSUN_QUEUE',
                    dequeue_options       => l_dequeue_options,
                    message_properties    => l_msg_properties,
                    payload               => l_msg,
                    msgid                 => l_msg_handle);
          
    dbms_output.put_line ('MSG id: '||l_msg.id||', name: '||l_msg.name);
    
    prt(i, p_prt);
    
    commit;
      exception when no_messages_excp then prt(i, p_prt);
         when others then prt(i, true); 
                  raise;  
   end;
 end loop;
end;
/    

create or replace procedure test_deq_jobs (p_jobs number := 66, p_cnt number := 2000) as
 l_job_id pls_integer;
begin
  for i in 1..p_jobs loop
  dbms_job.submit(l_job_id, 'begin while true loop test_deq('||p_cnt||'); end loop; end;');
  commit;
  dbms_output.put_line(l_job_id);
 end loop;
end;
/        

create or replace procedure clean_jobs as
  begin
   for c in (select job from dba_jobs) loop
      begin
         dbms_job.remove (c.job);
      exception when others then null;
      end;
      commit;
   end loop;

   for c in (select d.job, d.sid, (select serial# from v$session where sid = d.sid) ser 
               from dba_jobs_running d) loop
      begin
         execute immediate
               'alter system kill session '''|| c.sid|| ',' || c.ser|| ''' immediate';
         dbms_job.remove (c.job);
      exception when others then null;
      end;
      commit;
   end loop;
   
   -- select * from dba_jobs;
   -- select * from dba_jobs_running;
end;
/

------------------------------------- SQL curosr and ORA-04025 Test ---------------------------------

import java.sql.Connection; 
import java.sql.DriverManager; 
import java.sql.PreparedStatement; 
import java.sql.ResultSet; 
import java.sql.SQLException;

public class OracleJdbcExample4025 { 

public static void main(String args[]) throws SQLException { 
 System.out.println("--------SQL curosr and ORA-04025 Test ------");
 try {
  Class.forName("oracle.jdbc.driver.OracleDriver");
 } catch (ClassNotFoundException e) {
  System.out.println("Where is your Oracle JDBC Driver?");
  e.printStackTrace();
  return;
 }
 System.out.println("Oracle JDBC Driver Registered!");
 Connection connection = null;
 try {
  connection = DriverManager.getConnection(
    "jdbc:oracle:thin:@testdb:1522:testdb", "s", "s");  // adapt it
 } catch (SQLException e) {
  System.out.println("Connection Failed! Check output console");
  e.printStackTrace();
  return;
 }    
 String sql ="select sysdate as current_day from dual"; 
 int cnt = 0;
 while (true) { 
  //creating PreparedStatement object to execute query 
  PreparedStatement preStatement = connection.prepareStatement(sql); //<<< this opening cursor 
  ResultSet result = preStatement.executeQuery(); 
  while(result.next()){ 
   System.out.println("Count=" + (++cnt) + ", Date : " + result.getString("current_day")); 
  } 
 // preStatement.close();  //<<< if this is uncommented cursor is closed, otherwise not. 
 } 
 // System.out.println("done"); 
 } 
} 

Monday, June 13, 2016

SQL Parsing in serializable transaction throws ORA-08177: can't serialize access for this transaction

Setting serializable isolation level in Oracle 12c (12.1.0.2.0), SQL Parsing can throw:
  ORA-08177: can't serialize access for this transaction

run test (see Blog appended TestCode):
 
SQL (742) > exec ora_08177_test(10, 'ksun_ora_08177_collect_listagg');

Run=1
ERROR at line 1:
ORA-08177: can't serialize access for this transaction
ORA-06512: at "ORA_08177_TEST", line 20
ORA-06512: at line 1

Note: Each time, open a new session, run the test. After first run, subsequent runs in the same session do not hit the error.

According to Oracle documentation: Database Concepts - Data Concurrency and Consistency

The database generates an error when a serializable transaction tries to update or delete data changed by a different transaction that committed after the serializable transaction began:
 ORA-08177: Cannot serialize access for this transaction


Here there exists only one single session, so it does not satisfy the requirement of above documentation.

Reasoning


Refer to the recursive session discussed in Blog: Oracle 12c single session "library cache lock (cycle)" deadlock .

run query:
 
select distinct s.sid, holding_user_session, holding_session
  from v$libcache_locks c, v$session s
 where lnnvl(holding_user_session = holding_session)
   and c.holding_user_session = s.saddr 
   and s.sid = 742;
   
SID HOLDING_USER_SES HOLDING_SESSION 
--- ---------------- ----------------
742 070001010ECF8680 070001010ECFC880    

(742 is the SID of test session)

crosscheck the dump generated by 8177 trace event on AIX (we set this event for the test):

----- Call Stack Trace -----
ktrexc()+3536        call     ksesecl0()           FFFFFFFFFFC9FB0 ? 122F5F490 ?
rpidrv()+3572        call     rpiswu2()            70001010ECFC880 ? 1221BF1E0 ?
kksLoadChild()+3544  call     rpiswu2()            70001010ECF8680 ? 000000090 ?
...
========== FRAME [19] (ktrexc()+3536 -> ksesecl0()) ==========
defined by frame pointers 0xffffffffffca080  and 0xffffffffffc9ee0
CALL TYPE: call   ERROR SIGNALED: yes   COMPONENT: (null)
...
PROCESS STATE
-------------
Process global information:
     process: 0x70001010e8c4078, call: 0x7000100eb0e7bb0, xact: 0x7000101037e9880, 
     curses: 0x70001010ecfc880, usrses: 0x70001010ecf8680
...
KSUSETRN = 0X07000101037E9880
...
(trans) flg = 0x00000004, flg2 = 0x00400000, flg3 = 0x00000040, 
  prx = 0x7000101037e9880, ros = 2147483647, crtses=0x70001010ecf8680

we can establish two equations:

holding_user_session = 070001010ECF8680 = usrses: 0x70001010ecf8680
holding_session      = 070001010ECFC880 = curses: 0x70001010ecfc880

which tellingly indicate the co-existence of two different sessions, usrses(user session) and curses (cursor session), and existence of transaction (0X07000101037E9880) created by user session (crtses=0x70001010ecf8680).

Looking Call Stack Trace, user session calls:
   kksLoadChild()
and then switches to cursor session, which triggers rpi (recursive procedure interface):
 rpidrv()
and finally ERROR SIGNALED at:
  ktrexc()
where ktr means kernel transaction read consistency.

Since user session already started a transaction, any attempt to modify the data by the cursor session resulted in:
 ORA-08177: can't serialize access for this transaction

Run other two tests:

SQL (742) > exec ora_08177_test(10, 'ksun_ora_08177_collect');
SQL (742) > exec ora_08177_test(10, 'ksun_ora_08177_listagg');

there are no errors.

Hence, the error is probably related to certain constellation of collect and listagg.

All tests are performed on AIX, Solaris and Linux with Oracle 12c (12.1.0.2.0).
It is not reproducible in Oracle 11.2.0.4.0.

By the way, the first run of query:

SQL (742)> select collect (y) from ksun_ora_08177;
SYSTPNSRcj9+JXnjgVAAmVdl5Ow==(1, 2, 3, 0, 1, 2, 3, 0, 1)

creates a new TYPE:

TYPE     "SYSTPNSRcj9+JXnjgVAAmVdl5Ow==" AS TABLE OF NUMBER

which is due to "COLLECT" function.

It can be verified by:

select s.*, o.last_ddl_time, o.created, o.*
  from dba_source s, dba_objects o
 where s.name = o. object_name
   and o.object_type = 'TYPE'
   and name = 'SYSTPNSRcj9+JXnjgVAAmVdl5Ow==';

and also documented in Database SQL Language Reference:

COLLECT is an aggregate function that takes as its argument a column of any type
and creates a nested table of the input type out of the rows selected.


Another example of TYPE creation was observed in Blog: Oracle 12c single session "library cache lock (cycle)" deadlock.

Addendum


It seems that fixing such a bug requires a lot of design change in the code, for example, distinguishing user DML and sys DML on underlying objects (e.g. undo$ or user$ tables). For the time being, fixing the bug is not feasible. Therefore be cautious when using such features.

TestCode


--------------------
set numformat 99
column y_coll format a10

alter system set events '8177 trace name errorstack level 10';

drop table ksun_ora_08177;

create table ksun_ora_08177 as select mod(level, 2) x, mod(level, 4) y from dual connect by level <= 9; 

select * from ksun_ora_08177;

create or replace type ksun_tab force is table of number;
/

--------------------
create or replace force view ksun_ora_08177_collect_listagg(x, cnt, y_coll) as
with sq as (
  select x, count(*) cnt, cast (collect (distinct y) as ksun_tab) y
  from ksun_ora_08177
  group by x)
select x, cnt,
       (select listagg (column_value, ',') within group (order by column_value)
          from table (y)) y_coll
from sq 
where x is not null;

--------------------

create or replace force view ksun_ora_08177_collect(x, cnt, y_coll) as
select x, count(*) cnt, cast (collect (distinct y) as ksun_tab) y_coll
from ksun_ora_08177
where x is not null
group by x;

--------------------
create or replace force view ksun_ora_08177_listagg(x, cnt, y_coll) as
select x, count(*) cnt, listagg (y, ',') within group (order by y) y_coll
from ksun_ora_08177
where x is not null
group by x;

--------------------
create or replace procedure ora_08177_test(p_cnt number, p_view varchar2) as
 l_cur       integer;
 l_qry       varchar2(300) := 'select * from '||p_view;
 l_idx           pls_integer;
 l_col_desc_tab  dbms_sql.desc_tab;
begin
 l_qry := 'select /* '||p_view ||' */ * from '||p_view;
 for i in 1..p_cnt loop
  dbms_output.put_line('Run='||i);
  execute immediate 'alter system flush shared_pool';

  begin
    commit;
    set transaction isolation level serializable;
    l_cur := dbms_sql.open_cursor;
    dbms_sql.parse(l_cur, l_qry, dbms_sql.native);
    dbms_sql.describe_columns (l_cur, l_idx, l_col_desc_tab);
    dbms_sql.close_cursor(l_cur);
    exception when others then
      dbms_sql.close_cursor(l_cur);
      raise;
  end;
 end loop;
end;
/

-- Note: Each time, open a new session, run the test. After first run, subsequent runs in the same session do not hit the error.
-- exec ora_08177_test(10, 'ksun_ora_08177_collect_listagg');
-- exec ora_08177_test(10, 'ksun_ora_08177_collect');
-- exec ora_08177_test(10, 'ksun_ora_08177_listagg');