Wednesday, October 25, 2023

ORA-28268: Exceeded the maximum allowed size for Context information in a session

Note: Tested in Oracle 19.18.


1. Test Setup



-- code from Blog: "library cache: mutex X" and Application Context 
--    (http://ksun-oracle.blogspot.com/2016/11/library-cache-mutex-x-and-application.html)
create or replace context test_ctx using test_ctx_pkg;

--create or replace context test_ctx using test_ctx_pkg accessed globally;

create or replace package test_ctx_pkg is 
  procedure set_val (attr varchar2, val varchar2);
 end;
/

create or replace package body test_ctx_pkg is
  procedure set_val (attr varchar2, val varchar2) as
  begin
    --dbms_session.clear_all_context('TEST_CTX');
    dbms_session.set_context('TEST_CTX', attr, val);
  end;
end;
/

create or replace procedure ctx_set(p_cnt number, attr_pre varchar2, val_pre varchar2) as
begin
 for i in 1..p_cnt loop
  test_ctx_pkg.set_val(attr_pre||i, val_pre||i);    -- 'library cache: mutex X' on TEST_CTX
 end loop;
end;
/

create or replace procedure test_ORA_28268 (p_cnt number) as 
  l_attr_pre varchar2(32000);
  l_val_pre  varchar2(32000);
  l_val      varchar2(32000);
begin
  --dbms_session.clear_all_context('TEST_CTX');
  ctx_set(p_cnt, 'ATTR_', 'VAL_');
  
  select sys_context('TEST_CTX', 'ATTR_'||1) into l_val from dual;
  dbms_output.put_line('l_attr='||'ATTR_'||1||', VAL='||l_val);
  
  select sys_context('TEST_CTX', 'ATTR_'||p_cnt) into l_val from dual;
  dbms_output.put_line('l_attr='||'ATTR_'||p_cnt||', VAL='||l_val); 
end;
/

create or replace procedure list_context as 
  l_tab  dbms_session.appctxtabtyp;
  l_size number;
begin
  dbms_session.list_context(l_tab, l_size);
  dbms_output.put_line('l_tab.count: '||l_tab.count||', l_size:'||l_size);
  for i in 1..l_tab.count loop
    dbms_output.put_line('namespace: '||l_tab(i).namespace||', attribute: '||l_tab(i).attribute||', value: '||l_tab(i).value);
  end loop;
end;
/

2. Test Run



SQL > exec test_ORA_28268(10000-300);

      l_attr=ATTR_1, VAL=VAL_1
      l_attr=ATTR_9700, VAL=VAL_9700

SQL > exec test_ORA_28268(10000+300);

      BEGIN test_ORA_28268(10000+300); END;
      ERROR at line 1:
      ORA-28268: Exceeded the maximum allowed size for Context information in a session
      ORA-06512: at "SYS.DBMS_SESSION", line 141
      ORA-06512: at "K.TEST_CTX_PKG", line 5
      ORA-06512: at "K.CTX_SET", line 4
      ORA-06512: at "K.TEST_ORA_28268", line 7
      ORA-06512: at line 1

3. ORA-28268


Oracle document:

  28268, 0000, "Exceeded the maximum allowed size for Context information in a session"
  // *Cause:    The maximum size specified by the _session_context_size
  //            init.ora parameter was exceeded.
  // *Action: Please change the value for _session_context_size in the
  //  
ORA-28268 is raised when the number of session local context entries exceeded limit (session local context array) which is defined by "_session_context_size" (default 10,000).

It is only applied to local context (dba_context.type = 'ACCESSED LOCALLY')
(for global context, ORA-04031 when too many global context entries, check v$sgastat.name = 'Global Context').

It's value can be modified by:

  alter system set "_session_context_size"=10000 scope=spfile;
One cause of ORA-28268 is probably session local context leak.

The name "_session_context_size" seems misleading, mabybe renamed as "_session_context_entry_limit".

Wednesday, September 20, 2023

Oracle Index Root Modification CBC Latch Contention: Workaround Studies

In Oracle applications, often there are certain central tables, which are accessed concurrently by many sessions using indexes. If one index root block is modified due to branch or leaf block split when running DML statements, the system could experience hard to predict heavy CBC Latch Contention on root block.

In this Blog, we would try to set apart root block modification from DML operations so that root block modification is performed at first, and then subsequent DML are executed.

The purpose is only for illustrative, experimental and feasibility studies in the hope that Oracle can refine the idea.

Test code is based on Blogs:

   Index Service ITL and Recursive Transaction
   Cache Buffer Chains Latch Contention Case Study-2: Reverse Primary Key Index
   Oracle 19.18 Wait Event 'latch: cache buffers chains' P3: block number

Note: Tested in Oracle 19.18.


1. Test Setup



--=================== Test Setup in session-1 ===================--

drop table test_tab purge;

create table test_tab (id number, val number);

drop index test_tab#r;

create unique index test_tab#r on test_tab(id) reverse;

alter table test_tab add constraint test_tab#r primary key (id);

truncate table test_tab;

insert into test_tab select level, -level from dual connect by level <= 4905;

commit;

-- Test Code to insert one row to trigger leaf block split, and root block modification
create or replace procedure root_modi_cbc_latch_workaround (p_insert_id number, p_allow_seconds number := 5) as
  type t_name_tab    is table of dba_segments.segment_name%type;
  type t_file_tab    is table of dba_segments.header_file%type;
  type t_block_tab   is table of dba_segments.header_block%type;
  l_name_tab         t_name_tab   := t_name_tab();
  l_file_tab         t_file_tab   := t_file_tab();
  l_block_tab        t_block_tab  := t_block_tab();
  l_cnt              number := 0; 
  l_scn              number;
  l_scn_ts           timestamp;
  l_modi_ts          timestamp;
  l_msg_str          varchar2(200);
  l_modi             boolean := false;
begin
  -- collect indexes and their root files/root blocks
  -- @TODO store those in a package variable to workaround dba_segments performance
  with sq as (select /*+ materialize */ index_name from dba_indexes where table_name = 'TEST_TAB')
  select /*+ leading(q) use_nl(s) */ q.index_name, header_file, header_block + 1
         bulk collect into l_name_tab, l_file_tab, l_block_tab
    from sq q, dba_segments s where s.segment_name = q.index_name;
  
  dbms_session.sleep(p_allow_seconds);  -- simulate appliaction
  
  -- check if there are any root block modifications. If yes, rollback, re-insert. If not, exit loop.
  for i in 1 ..2 loop
    dbms_output.put_line(systimestamp||'--- Run ' ||i||'--- set savepoint');
    savepoint sp;
    insert into test_tab values(p_insert_id, -p_insert_id);
    l_modi_ts := cast (systimestamp as timestamp);
    
    if i = 1 then 
      for k in 1..l_name_tab.count loop
        select count(1) into l_cnt from v$bh 
         where file#=l_file_tab(k) and block# = l_block_tab(k) and status = 'xcur';   --and dirty = 'Y'

        select max(hscn_wrp*power(2, 32) + hscn_bas) into l_scn from x$bh 
         where l_cnt > 0 and state = 1 and hscn_wrp > 0 and file#=l_file_tab(k) and dbablk=l_block_tab(k);   --state=1, 'xcur'
        
        if l_scn > 0 then
          l_scn_ts := scn_to_timestamp(l_scn);    -- scn_to_timestamp session cached. run test in one new session.
        end if;
        
        l_msg_str := systimestamp||'--- Run ' ||i||'--- Index '||l_name_tab(k)||' Root (File = '||l_file_tab(k)||', Block = '||l_block_tab(k)||'), '||
                    'l_cnt = '||l_cnt||', l_scn = '||l_scn||', l_scn_ts = '||l_scn_ts;
        
        dbms_output.put_line(l_msg_str);
        
        -- scn_to_timestamp: the usual precision of the result value is 3 seconds.
        if l_scn_ts between l_modi_ts - numtodsinterval(p_allow_seconds, 'SECOND') and l_modi_ts then 
          dbms_output.put_line('*** Root modified at '||l_modi_ts||', Root Index Modi at '||l_scn_ts ||
                               ', rollback to make only root modi trx performed, then re-insert.');
          l_modi := true;
          rollback to savepoint sp;   -- only rollback main trx, root modi recursive trx committed (un-rollbackable)
          exit;                       -- one index root modi found, finish modi recursive trx, exit internal loop
        end if; 
      end loop;
      
      if not l_modi then 
        dbms_output.put_line('*** Root not modified');
        exit;
      end if;
    end if;
  end loop;    
end;
/


2. Test Run



--========== Test Run in session-2 (to repeat test, run "Test Setup" at first) ==========--

set serveroutput on
alter session set nls_timestamp_format ='YYYY*MON*DD HH24:MI:SS.FF9';

select scn_to_timestamp(ora_rowscn), t.* from test_tab t where id >= 4904 order by id;

exec root_modi_cbc_latch_workaround(4906);

exec root_modi_cbc_latch_workaround(4907);

select scn_to_timestamp(ora_rowscn), t.* from test_tab t where id >= 4904 order by id;

rollback;


3. Test Output



--=================== Test Output in session-2 ===================--

12:01:34 SQL > select scn_to_timestamp(ora_rowscn), t.* from test_tab t where id >= 4904 order by id;
  SCN_TO_TIMESTAMP(ORA_ROWSCN)             ID        VAL
  -------------------------------- ---------- ----------
  2023*SEP*20 12:01:17.000000000         4904      -4904
  2023*SEP*20 12:01:17.000000000         4905      -4905
  
  2 rows selected.

12:01:34 SQL > exec root_modi_cbc_latch_workaround(4906);
  20-SEP-2023 12:01:42 +02:00--- Run 1--- set savepoint
  20-SEP-2023 12:01:42 +02:00--- Run 1--- Index TEST_TAB#R Root (File = 22, Block = 35467), l_cnt = 1, 
                                          l_scn = 12776808521751, l_scn_ts = 2023*SEP*20 12:01:41.000000000
  *** Root modified at 2023*SEP*20 12:01:42.271069000, Root Index Modi at 2023*SEP*20 12:01:41.000000000, 
      rollback to make only root modi trx performed, then re-insert.
  20-SEP-2023 12:01:42 +02:00--- Run 2--- set savepoint

  Elapsed: 00:00:07.80
  
12:01:42 SQL > exec root_modi_cbc_latch_workaround(4907);
  20-SEP-2023 12:01:50 +02:00--- Run 1--- set savepoint
  20-SEP-2023 12:01:50 +02:00--- Run 1--- Index TEST_TAB#R Root (File = 22, Block = 35467), l_cnt = 1, 
                                          l_scn = 12776808521752, l_scn_ts = 2023*SEP*20 12:01:41.000000000
  *** Root not modified

  Elapsed: 00:00:07.86
  
12:01:50 SQL > select scn_to_timestamp(ora_rowscn), t.* from test_tab t where id >= 4904 order by id;
  SCN_TO_TIMESTAMP(ORA_ROWSCN)             ID        VAL
  -------------------------------- ---------- ----------
  2023*SEP*20 12:01:17.000000000         4904      -4904
  2023*SEP*20 12:01:17.000000000         4905      -4905
  2023*SEP*20 12:01:17.000000000         4906      -4906
  2023*SEP*20 12:01:17.000000000         4907      -4907
  
  4 rows selected.
  
12:01:50 SQL > rollback;
  Rollback complete.
Test output shows that both new rows with ID 4906 and 4907 are inserted, only ID 4096 triggers root block modification, but ID 4097 does not.

Wednesday, September 6, 2023

Oracle 19.18 Wait Event 'latch: cache buffers chains' P3: block number

In Oracle 19.18, we observed that Wait Event 'latch: cache buffers chains' Parameter P3 stores DBA (Data Block Address)
(for bigfile tablespace, it is block number (block#). For smallfile tablespace, it is rfile# * power(2, 22) + block#).
(See Blog: latch: cache buffers chains latch contention - a better way for finding the hot block )

This new enhancement makes it much easier to locate CBC latch contention point.

In Oracle Docu, PARAMETER3 is still denoted as "why":

select event#, event_id, name, parameter1, parameter2, parameter3 from v$event_name where name = 'latch: cache buffers chains';

  EVENT#   EVENT_ID NAME                          PARAMETER1 PARAMETER2 PARAMETER3
  ------ ---------- ----------------------------- ---------- ---------- ----------
     113 2779959231 latch: cache buffers chains   address    number     why
To watch such new improvement, one can run test code in Blog: Cache Buffer Chains Latch Contention Case Study-2: Reverse Primary Key Index

We can use following queries to find p3 blocks:

select program, event, p1, p2, p3, p1text, p2text, p3text, v.* 
  from v$session v where event = 'latch: cache buffers chains' order by v.p3; 

select program, event, p1, p2, p3, p1text, p2text, p3text, v.* 
  from v$active_session_history v  
  -- from dba_hist_active_sess_history t
 where event = 'latch: cache buffers chains' order by sample_time desc, v.p3; 
Then with above p3 blocks, list objects by:

select o.object_name, b.* 
  from v$bh b, dba_objects o 
 where b.objd=o.data_object_id 
   and block# in (11763, 11765, 35468)   -- 3 example blocks
 order by block#, class#, b.status;

select o.object_name, dbablk, tch, hladdr cbc_latch_addr
      ,hscn_wrp*power(2, 32) + hscn_bas scn
      ,case when hscn_wrp > 0 then scn_to_timestamp(hscn_wrp*power(2, 32) + hscn_bas) else null end scn_ts
      ,tim epoch
      ,to_date('1970-01-01 00:00:00', 'yyyy-mm-dd hh24:mi:ss') + tim/86400 epoch_time_utc
      ,DECODE (state, 0, 'free', 1, 'xcur', 2, 'scur', 3, 'cr', 4, 'read', 5, 'mrec', 6, 'irec', 7, 'write', 8, 'pi', 9, 'memory', 10, 'mwrite'
              ,11, 'donated', 12, 'protected', 13, 'securefile', 14, 'siop', 15, 'recckpt', 16, 'flashfree', 17, 'flashcur', 18, 'flashna') 
       status
      ,DECODE (BITAND (flag, 1), 0, 'N', 'Y')     dirty
      ,DECODE (BITAND (flag, 16), 0, 'N', 'Y')    temp
      ,DECODE (BITAND (flag, 1536), 0, 'N', 'Y')  ping
      ,DECODE (BITAND (flag, 16384), 0, 'N', 'Y') stale
      ,DECODE (BITAND (flag, 65536), 0, 'N', 'Y') direct
      ,b.* 
  from x$bh b, dba_objects o 
 where b.obj=o.data_object_id 
   and b.dbablk in (11763, 11765, 35468)   -- 3 example blocks
 order by b.dbablk, b.state;
 
-- With above scn_ts to find v$session.sid by matching v$transaction.start_time

Sunday, September 3, 2023

Oracle Index Range Scan with LIKE Condition on Wildcard '_'

This Blog shows LIKE condition index range scan performance and space usage of Oracle index with Wildcard '_'.

Note: Tested in Oracle 19.18.


1. Test Setup


At first, we create a test table with 3 varchar2 columns (name1, name2, name3), which are the concatenations of 3 same items, but in different order, and then 3 respective indexes on them. The item part contains Wildcard '_'.

drop table test_tab purge;

create table test_tab (
    id           number
   ,grp          number
   ,tstr         varchar2(14)
   ,name1        varchar2(100)
   ,name2        varchar2(100)
   ,name3        varchar2(100)
);

create unique index test_tab#p on test_tab(id);

alter table test_tab add constraint test_tab#p primary key (id);

create index test_tab#i#name1 on test_tab (name1);

create index test_tab#i#name2 on test_tab (name2);

create index test_tab#i#name3 on test_tab (name3);
 
insert into test_tab 
with sq as (
select level id, mod(level, 300) grp
      ,to_char((date'2021-11-22' + rownum/86400), 'YYYYMMDDHH24MISS')           ts
      ,decode(mod(level, 3), 0, 'ONE_PART', 1, 'TWO_PART', 2, 'THREE_PART')     part
  from dual connect by level <= 3*1e5)
select id, grp, ts
      ,part ||'.'||grp  ||'.'||ts     name1
      ,grp  ||'.'||part ||'.'||ts     name2
      ,ts   ||'.'||part ||'.'||grp    name3
 from sq;
 
commit;

exec dbms_stats.gather_table_stats('', 'TEST_TAB', cascade=>true);
If we run a query, we can see the format of three name columns:

col name1 for a30 new_value n1
col name2 for a30 new_value n2
col name3 for a30 new_value n3
select id, name1, name2, name3 from test_tab m where id = trunc(dbms_random.value(1, 3*1e5));

    ID  NAME1                        NAME2                        NAME3
------  ---------------------------  ---------------------------  ---------------------------
243505  TWO_PART.205.20211124193825  205.TWO_PART.20211124193825  20211124193825.TWO_PART.205
Then we create a Plsql test program using 3 indexes and printout their execution stats.

create or replace procedure test_tab_proc (p_case number, p_cnt number) as
  l_start       number;
  l_start_cr    number;
  l_end_cr      number;
  l_name1       varchar2(100);
  l_name2       varchar2(100);
  l_name3       varchar2(100);
  l_ret         varchar2(200);
begin
  select name1, name2, name3 into l_name1, l_name2, l_name3
    from test_tab where id = trunc(dbms_random.value(1, 3*1e5)); 

  l_start  := dbms_utility.get_time;
  select value into l_start_cr from v$mystat s, v$statname n where s.statistic#=n.statistic# and name = 'consistent gets';
  case p_case
    when 1 then
      for i in 1..p_cnt loop
        select  /*+ index_rs_asc(t (name1)) */ name1 into l_ret from test_tab t where name1 like l_name1;
      end loop;
      dbms_output.put_line('--------- Index: name1 like '||l_name1||' --------- ');
    when 2 then
      for i in 1..p_cnt loop
        select  /*+ index_rs_asc(t (name2)) */ name2 into l_ret from test_tab t where name2 like l_name2;
      end loop;
      dbms_output.put_line('--------- Index: name2 like '||l_name2||' --------- ');
    when 3 then
      for i in 1..p_cnt loop
        select  /*+ index_rs_asc(t (name3)) */ name3 into l_ret from test_tab t where name3 like l_name3;
      end loop;
      dbms_output.put_line('--------- Index: name3 like '||l_name3||' --------- ');
    end case;
    select value into l_end_cr from v$mystat s, v$statname n where s.statistic#=n.statistic# and name = 'consistent gets';
    dbms_output.put_line('Total Elapsed MS = '||round((dbms_utility.get_time-l_start)*10)||
                       ', Total CR gets= '    ||(l_end_cr-l_start_cr)||
                       ', Per Exec MS = '     ||round((dbms_utility.get_time-l_start)*10/p_cnt, 2)||
                       ', Per Exec CR gets = '||round((l_end_cr-l_start_cr)/p_cnt));
end;
/


2. Test Run


We run 3 tests for 3 indexes with Sql trace:

alter session set tracefile_identifier = 'sql_trc_1';
alter session set events '10046 trace name context forever, level 12'; 

exec test_tab_proc(1, 100);
exec test_tab_proc(2, 100);
exec test_tab_proc(3, 100);

alter session set events '10046 trace name context off';
The output shows that index name1 has 902 CR gets per execution, name2 has 12, and name3 has 3, although each of them returns one single row per execution.

SQL > exec test_tab_proc(1, 100);
      --------- Index: name1 like ONE_PART.270.20211122144930 ---------
      Total Elapsed MS = 1380, Total CR gets= 90200, Per Exec MS = 13.8, Per Exec CR gets = 902

SQL > exec test_tab_proc(2, 100);
      --------- Index: name2 like 37.TWO_PART.20211124044537 ---------
      Total Elapsed MS = 30, Total CR gets= 1200, Per Exec MS = .3, Per Exec CR gets = 12

SQL > exec test_tab_proc(3, 100);
      --------- Index: name3 like 20211124141442.ONE_PART.282 ---------
      Total Elapsed MS = 10, Total CR gets= 300, Per Exec MS = .1, Per Exec CR gets = 3
Here 3 xplans. They are all using INDEX RANGE SCAN, but different execution stats.

************************ INDEX RANGE SCAN TEST_TAB#I#NAME1 **************************

SELECT /*+ index_rs_asc(t (name1)) */ NAME1 FROM TEST_TAB T WHERE NAME1 LIKE :B1 

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute    100      0.01       0.01          0          0          0           0
Fetch      100      1.34       1.34          0      90200          0         100
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total      201      1.35       1.35          0      90200          0         100

Rows  Row Source Operation
----  ---------------------------------------------------
   1  INDEX RANGE SCAN TEST_TAB#I#NAME1 (cr=902 pr=0 pw=0 time=17202 us starts=1 cost=2 size=29 card=1)(object id 5082850)


************************ INDEX RANGE SCAN TEST_TAB#I#NAME2 **************************

SELECT /*+ index_rs_asc(t (name2)) */ NAME2 FROM TEST_TAB T WHERE NAME2 LIKE :B1 

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute    100      0.00       0.00          0          0          0           0
Fetch      100      0.01       0.01          0       1200          0         100
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total      201      0.02       0.02          0       1200          0         100

Rows  Row Source Operation
----  ---------------------------------------------------
   1  INDEX RANGE SCAN TEST_TAB#I#NAME2 (cr=12 pr=0 pw=0 time=279 us starts=1 cost=2 size=29 card=1)(object id 5082851)


************************ INDEX RANGE SCAN TEST_TAB#I#NAME3 **************************

SELECT /*+ index_rs_asc(t (name3)) */ NAME3 FROM TEST_TAB T WHERE NAME3 LIKE :B1 

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute    100      0.00       0.00          0          0          0           0
Fetch      100      0.00       0.00          0        300          0         100
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total      201      0.00       0.00          0        300          0         100

Rows  Row Source Operation
----  ---------------------------------------------------
   1  INDEX RANGE SCAN TEST_TAB#I#NAME3 (cr=3 pr=0 pw=0 time=55 us starts=1 cost=2 size=29 card=1)(object id 5082852)


3. LIKE Condition Index Range Scan on Wildcard '_'Index


Following query shows that TEST_TAB#I#NAME1 and TEST_TAB#I#NAM2 have similar clustering_factor and leaf_blocks (space usage), but TEST_TAB#I#NAME3 has much less (see later Index Treedump).

select index_name, clustering_factor, leaf_blocks, blevel from dba_indexes v where table_name = 'TEST_TAB';

  INDEX_NAME         CLUSTERING_FACTOR LEAF_BLOCKS   BLEVEL
  ------------------ ----------------- ----------- --------
  TEST_TAB#I#NAME1              300000        2766        2
  TEST_TAB#I#NAME2              300000        2773        2
  TEST_TAB#I#NAME3                4714        1478        2
We can also get the distinct number of 3 name columns before wildcard '_':

select count(*)  
      ,count(distinct substr(name1, 1, instr(name1, '_')-1)) name1_prefix_cnt
      ,count(distinct substr(name2, 1, instr(name2, '_')-1)) name2_prefix_cnt
      ,count(distinct substr(name3, 1, instr(name3, '_')-1)) name3_prefix_cnt
from test_tab;

  COUNT(*)  NAME1_PREFIX_CNT  NAME2_PREFIX_CNT  NAME3_PREFIX_CNT
  --------  ----------------  ----------------  ----------------
    300000                 3               300            300000
name1 in test_tab is constructed like:

   ONE_PART.x.y
   TWO_PART.x.y
   THREE_PART.x.y
Before '_', there are 3 different values ('ONE', 'TWO', 'THREE'). So name1 is divided into three parts. TEST_TAB#I#NAME1 has 2766 leaf blocks, TEST_TAB#I#NAME1 index range scan makes 2766/3, which is 902 CR gets per execution (it performs like an index partition full scan).

Similarily for name2, it is 2773/300, which is about 12 CR gets per execution (index BLEVEL = 2 requires 2 branch blocks gets per execution).

And for name3, it is 3 CR gets per execution (2 branch blocks gets and 1 leaf block get).

Above tests show that all of 3 queries return one single row per execution, but TEST_TAB#I#NAME1 has a much higher cost.

If we run a query to compare the result of ordered index range scan with unordered index fast full scan with query:

with sq_rs  as (select /*+ index_asc(t test_tab#i#name1) materialize */ name1, rownum rn from test_tab t where name1 like 'ONE_PART%') 
    ,sq_ffs as (select /*+ index_ffs(t test_tab#i#name1) materialize */ name1, row_number() over(order by name1) rn from test_tab t where name1 like 'ONE_PART%')
select r.*, f.*
from sq_rs r, sq_ffs f
where r.rn = f.rn and r.name1 != f.name1  
--where r.rn != f.rn and r.name1 = f.name1 
;
   
no rows selected
"no rows selected" means that name1 in TEST_TAB#I#NAME1 is strictly sorted, but Like condition index range scan only uses substring before '_' for the search, therefore 1/3 TEST_TAB#I#NAME1 blocks are read although only one leaf block contains the searched data.


4. Index Meta Data


We can also collect more index meta data, then make index treedump and index block dump:

select object_name, object_id, to_char(object_id, 'xxxxxxxx') object_id_hex from dba_objects t where object_name like 'TEST_TAB#%';   

OBJECT_NAME         OBJECT_ID OBJECT_ID
------------------ ---------- ---------
TEST_TAB#I#NAME1      5082850    4d8ee2
TEST_TAB#I#NAME2      5082851    4d8ee3
TEST_TAB#I#NAME3      5082852    4d8ee4

select column_name, avg_col_len from  dba_tab_columns where table_name = 'TEST_TAB';   -- length 29

COLUMN_NAME   AVG_COL_LEN
------------- -----------
ID                      5
GRP                     4
TSTR                   15
NAME1                  29
NAME2                  29
NAME3                  29

select segment_name, header_file, header_block from dba_segments t where segment_name like 'TEST_TAB#%';

SEGMENT_NAME       HEADER_FILE HEADER_BLOCK
------------------ ----------- ------------
TEST_TAB#I#NAME1            22       826450
TEST_TAB#I#NAME2            22       826442
TEST_TAB#I#NAME3            22       826434

Index Treedump


alter session set tracefile_identifier = "TEST_TAB#I#NAME1_td";
alter session set events 'immediate trace name treedump level 5082850'; 

alter session set tracefile_identifier = "TEST_TAB#I#NAME2_td";
alter session set events 'immediate trace name treedump level 5082851';

alter session set tracefile_identifier = "TEST_TAB#I#NAME3_td";
alter session set events 'immediate trace name treedump level 5082852'; 
Treedump shows that NAME1 and NAME2 index blocks have avs = 4000 (AVailable Space, free space within the leaf block), whereas NAME3 has only avs=5. That is why name1 and name2 have more LEAF_BLOCKS than name3.

TEST_TAB#I#NAME1 
      leaf: 0xf5fcd 1007565 (1: row:108.108 avs:4000)
TEST_TAB#I#NAME2
      leaf: 0xf5dde 1007070 (1: row:108.108 avs:4000) 
TEST_TAB#I#NAME3
      leaf: 0xc9c46 826438 (1: row:202.202 avs:5)
The above dba_segments for TEST_TAB#I#NAME1 shows that its HEADER_BLOCK is 826450. If we look following TEST_TAB#I#NAME1 tree dump, we can see that root node block: 826451 is the next block of its HEADER_BLOCK: 826450.

------------- TEST_TAB#I#NAME1 tree dump -------------
----- begin tree dump
branch: 0xc9c53 826451 (0: nrow: 16, level: 2)
   branch: 0xf335c 996188 (-1: nrow: 162, level: 1)
      leaf: 0xc9c54 826452 (-1: row:107.107 avs:4037)
      leaf: 0xf3718 997144 (0: row:108.108 avs:4000)
      leaf: 0xf5fcd 1007565 (1: row:108.108 avs:4000)
      leaf: 0xf8517 1017111 (2: row:108.108 avs:4000)
      leaf: 0x15a2fe 1417982 (3: row:108.108 avs:4000) 
      
Note: "0xc9c53 826451" are DBA (Data Block Address) in (hex decimal).
      For bigfile tablespace, it is block#; for smallfile tablespace, it is rfile# * power(2, 22) + block#.
	  see dbms_utility.data_block_address_file, dbms_utility.data_block_address_block.
      
      If decimal number is negative, convert it by: power(2, 32) + "decimal number" = DBA.

Index analyze


analyze index TEST_TAB#I#NAME1 validate structure offline;
--validate index TEST_TAB#I#NAME1
select * from index_stats;
select * from index_histogram; 

analyze index TEST_TAB#I#NAME3 validate structure offline;
--validate index TEST_TAB#I#NAME3
select * from index_stats;
select * from index_histogram; 

Index Data Block Dump


alter session set tracefile_identifier = "TEST_TAB#I#NAME1_seg";
alter system dump datafile 22 block min 826450 block max 826457;	

alter session set tracefile_identifier = "TEST_TAB#I#NAME2_seg";
alter system dump datafile 22 block min 826442 block max 826449;	

alter session set tracefile_identifier = "TEST_TAB#I#NAME3_seg";
alter system dump datafile 22 block min 826434 block max 826441;	

NAME1 Index blocks List


select blk, count(*), min(id) min_id, max(id) max_id, min(name1), max(name1) from (
  select dbms_rowid.rowid_block_number(sys_op_lbid (5082845, 'L', rowid)) blk, t.* from test_tab t)  
group by blk order by blk;

Index Entry Sequence

Dump and compare block# read sequence of index range scan and index fast full scan against index treedump (index data structure)

-- index range scan gets index TEST_TAB#I#NAME1 blocks in ordered read (db file sequential read').
--   index range scan first reads index blocks from root block to the leftmost satisfied first leaf block along branch blocks, 
--   then reads from first found leaf blocks till last satisfied leaf block (which are linked with each one points to next one).
--   One block per read, logically sequential.

alter system flush buffer_cache; 
alter session set tracefile_identifier = 'sql_trc_rs';
alter session set events '10046 trace name context forever, level 12'; 
with sq_rs  as (select /*+ index_asc(t test_tab#i#name1) materialize */ name1, rownum rn from test_tab t where name1 like 'ONE_PART%') 
select count(*) from sq_rs r;
alter session set events '10046 trace name context off';

-- index fast full scan gets index TEST_TAB#I#NAME1 blocks in unordered read ('db file scattered read').
--   index fast full scan reads all index blocks (brach/leaf) like full table scan.
--   Multiple blocks per read without considering any order.

alter system flush buffer_cache; 
alter session set tracefile_identifier = 'sql_trc_ffs';
alter session set events '10046 trace name context forever, level 12'; 
with sq_ffs as (select /*+ index_ffs(t test_tab#i#name1) materialize */ name1, row_number() over(order by name1) rn from test_tab t where name1 like 'ONE_PART%')
select count(*) from sq_ffs f;
alter session set events '10046 trace name context off';

Thursday, August 3, 2023

Oracle SYS_GUID Composition

Oracle Docu wrote:
SYS_GUID generates and returns a globally unique identifier (RAW value) made up of 16 bytes. 
On most platforms, the generated identifier consists of a host identifier, a process or thread identifier of the process 
or thread invoking the function, and a nonrepeating value (sequence of bytes) for that process or thread.
The Oracle Database SYS_GUID function does not return a standard UUID since the generated GUID is not a random number.
(see: IETF RFC 4122 version 4 UUID, and Oracle dbms_crypto.randombytes and Enhancement Suggestion).

In this Blog, we will try to look its construction.

With following query, we can decompose it into 5 components.

with sq as (select sys_guid() gid, s.logon_time, p.spid, s.sid, s.serial#, p.pid from v$session s, v$process p 
             where s.paddr=p.addr and (s.sid = (select distinct sid from v$mystat)))
select substr(gid, 1, 12) seq_inc_by_1
      ,substr(gid, 13, 4)||'(='||to_number(substr(gid, 13, 4), 'XXXXXX')||')' proc_spid  --, to_number(substr(gid, 13, 4), 'XXXXXX') proc_spid_number
      ,substr(gid, 17, 4) unknown_1
      ,substr(gid, 21, 8) unix_host_id
      ,substr(gid, 29, 4) unknown_2
      ,sq.*
from sq;

  SEQ_INC_BY_1   PROC_SPID   UNKNOWN_1 UNIX_HOST_ID  UNKNOWN_2 
  -------------- ----------- --------- ------------  ----------
  01EC602EB9D3   259D(=9629) E063      6E08740A      29BD      
   
  GID                              LOGON_TIME           SPID SID SERIAL# PID
  -------------------------------- -------------------- ---- --- ------- ---
  01EC602EB9D3259DE0636E08740A29BD 2023-AUG-02 09:11:33 9629 197    5605  31


== Legend ==

    SEQ_INC_BY_1 : session own Sequence Number, increase 1 per sys_guid call, 
                   initiated by a number related to v$session logon_time
    PROC_SPID    : v$process.spid
    UNKNOWN_1    : ??? (E063 or E064)
    UNIX_HOST_ID : hostid command output (Linux little endian, 4 bytes reverse order)
    UNKNOWN_2    : ???
With each call of sys_guid, SEQ_INC_BY_1 is increased by 1, starting from some v$session.logon_time.

with sq1 as (select /*+ materialize */ level         nr, substr(sys_guid(), 1, 12) guid_12 from dual connect by level <= 1e6)
    ,sq2 as (select /*+ materialize */ level + 1*1e6 nr, substr(sys_guid(), 1, 12) guid_12 from dual connect by level <= 1e6)
    ,sq3 as (select /*+ materialize */ level + 2*1e6 nr, substr(sys_guid(), 1, 12) guid_12 from dual connect by level <= 1e6)
select min(nr) min_nr, max(nr) max_nr
      ,min(guid_12) min_guid_12, max(guid_12) max_guid_12
      ,count(*) nr_count
      ,to_number(max(guid_12), 'xxxxxxxxxxxx') - to_number(min(guid_12), 'xxxxxxxxxxxx') + 1 nr_count
from 
(select * from sq1
   union 
 select * from sq2
   union
 select * from sq3); 
 
  MIN_NR     MAX_NR MIN_GUID_12  MAX_GUID_12    NR_COUNT   NR_COUNT
  ------ ---------- ------------ ------------ ---------- ----------
       1    3000000 00E804347C2E 00E8046242ED    3000000    3000000
So SEQ_INC_BY_1 is a Sequence Number, increasing 1 per sys_guid call, initiated by a number related to epoch time of v$session logon_time (probably cached in each v$process.spid).

SEQ_INC_BY_1 is 12 hex digits, with maximum decimal value:

  select to_number(lpad('F', 12, 'F'), lpad('X', 12, 'X')) from dual; 
    -- 281474976710655
The last 6 digits represents a pure calling sequence number, the rest prefix digits are UNIX epoch seconds. So the maximum seconds is:

   281474976
Since 281474976 seconds is about 3258 days (281474976/86400) or about 9 years, sys_guid is wrapped on overflow about each 9 years. The first 10 reset datetime can be projected as follows:

select level NR#
      ,to_date('1970-01-01 00:00:00', 'yyyy-mm-dd hh24:mi:ss') + level*281474976/86400 datetime
  from dual connect by level <= 10;

    NR# DATETIME
  ----- --------------------
      1 1978*DEC*02 19:29:36
      2 1987*NOV*03 14:59:12
      3 1996*OCT*04 10:28:48
      4 2005*SEP*05 05:58:24
      5 2014*AUG*07 01:28:00
      6 2023*JUL*08 20:57:36
      7 2032*JUN*08 16:27:12
      8 2041*MAY*10 11:56:48
      9 2050*APR*11 07:26:24
     10 2059*MAR*13 02:56:00
16 bytes sys_guid is a 32 long raw hex value. In each interval, 10/16 of them are starting with number 0-9, 6/16 with A-F.

Given a sys_guid, we can estimate its datetime by:

with sq as (select to_number(substr('00E928C3120917BAE063A005740AC0F2', 1, 12), lpad('X', 12, 'X'))/1e6 epoch_reminder from dual)
select epoch_reminder
      ,to_date('2023*JUL*08 20:57:36', 'YYYY*MON*DD hh24:mi:ss') + epoch_reminder/86400 estimated_datetime
from  sq;

  EPOCH_REMINDER   ESTIMATED_DATETIME
  --------------   --------------------
      1001411.25   2023*JUL*20 11:07:47
For each logon_time, we can estimate SEQ_INC_BY_1 as follows:

create or replace function seq_inc_by_1_calc (logon_time date) return varchar2 as
  l_max_seconds   number;
  l_logon_utc     date;
  l_logon_epoch   number; 
  l_inc_by_1_calc varchar2(20);
begin
  l_max_seconds   := trunc(to_number(lpad('F', 12, 'F'), lpad('X', 12, 'X'))/1e6);
  l_logon_utc     := cast(from_tz(cast(logon_time as timestamp), to_char(systimestamp, 'TZH:TZM')) at time zone 'UTC' as date);
  l_logon_epoch   := (l_logon_utc - to_date('1970-01-01 00:00:00','YYYY-MM-DD HH24:MI:SS')) * 86400;
  l_inc_by_1_calc := to_char(mod(l_logon_epoch, l_max_seconds)*1e6, lpad('X', 12, '0'));
  return l_inc_by_1_calc;
end;
/
For example, here is a test to show generated first 12 hex digits and calculated SEQ_INC_BY_1 (seq_inc_by_1_calc):

with sq as (
  select /*+ materialize */ logon_time, sys_guid() guid, seq_inc_by_1_calc(logon_time) seq_inc_by_1_calc
    from v$session 
   where sid = (select sid from v$mystat where rownum=1))
select logon_time
      ,guid
      ,substr(guid, 1, 12) seq_inc_by_1
      ,seq_inc_by_1_calc
      ,trunc(to_number(substr(guid, 1, 12), lpad('X', 12, 'X'))/1e6) seq_inc_by_1_num
      ,trunc(to_number(seq_inc_by_1_calc, lpad('X', 12, 'X'))/1e6)        seq_inc_by_1_calc_num
  from sq;

  LOGON_TIME           GUID                             SEQ_INC_BY_1   SEQ_INC_BY_1_CALC  SEQ_INC_BY_1_NUM SEQ_INC_BY_1_CALC_NUM
  -------------------- -------------------------------- -------------- ------------------ ---------------- ---------------------
  2023-AUG-02 09:11:33 01EC602EB9C2259DE0636E08740A29BD 01EC602EB9C2    01EB5FD90340               2114737               2110437
To verify the above SEQ_INC_BY_1 estimation, we run Oracle Jobs to collect both generated and computed value and then run query to compare the differences.

drop table test_guid_tab;

create sequence test_guid_seq;

create table test_guid_tab as
  select -123456789 job_id, test_guid_seq.nextval seq, sys_guid() guid
        ,logon_time, sid, s.serial#, spid, pid, sysdate datetime
   from v$session s, v$process p 
   where s.paddr=p.addr and sid = (select sid from v$mystat where rownum=1) and 1=2;

create or replace procedure test_tab_guid_proc(p_job_id number, p_sleep_seconds number := 1) as
begin
  insert into test_guid_tab
  select p_job_id, test_guid_seq.nextval seq, sys_guid() guid
        ,logon_time, sid, s.serial#, spid, pid, sysdate datetime
   from v$session s, v$process p 
   where s.paddr=p.addr and sid = (select sid from v$mystat where rownum=1);
  commit;
  dbms_session.sleep(p_sleep_seconds);
end;
/

--exec test_tab_guid_proc(0);

-- select * from test_guid_tab;

-- collect sys_guid by jobs. Each new started job logs on as a new session with a new logon_time.
create or replace procedure test_tab_guid_job(p_job_cnt number, p_dur_seconds number, p_sleep_seconds number := 1) as
  l_job_name varchar2(50);
begin
  for i in 1..p_job_cnt loop
    l_job_name := 'TEST_JOB_GUID_'||i;
    dbms_scheduler.create_job (
      job_name        => l_job_name,
      job_type        => 'PLSQL_BLOCK',
      job_action      => 
        'begin 
           test_tab_guid_proc('||i||', '||p_sleep_seconds||');
        end;',    
      start_date      => systimestamp,
      end_date        => systimestamp + numtodsinterval(p_dur_seconds, 'SECOND'),
      repeat_interval => 'freq=SECONDLY',
      auto_drop       => true,
      enabled         => true);
  end loop;
end;
/

-- 32 Job sessions running 100 seconds, each time a new logon_time for each session.
--exec test_tab_guid_job(32, 100);


-- check the difference of Oracle generated GUID and calculated GUID

select t.*, seq_inc_by_1_calc(logon_time) guid_12_calc
      ,substr(guid, 1, 12) guid_12
      ,seq_inc_by_1_calc(logon_time) guid_12_calc
      ,trunc(to_number(substr(guid, 1, 12), lpad('X', 12, 'X'))/1e6)            guid_12_num
      ,trunc(to_number(seq_inc_by_1_calc(logon_time), lpad('X', 12, 'X'))/1e6)  guid_12_calc_num
      ,trunc(to_number(substr(guid, 1, 12), lpad('X', 12, 'X'))/1e6)
       -
       trunc(to_number(seq_inc_by_1_calc(logon_time), lpad('X', 12, 'X'))/1e6)  delta
from test_guid_tab t
--where spid=32287
order by sid, logon_time;
By the way, sys_guid() is internally used to generate Oracle AQ msgid out parameter in DBMS_AQ.ENQUEUE and DBMS_AQ.DEQUEUE call. (for some example, see Blog: Oracle Multi-Consumer AQ dequeue with dbms_aq.listen Wait Events (I) )

sys_guid() is Oracle internal subroutine kokidgen. In oracle STANDARD package, it is pesguid.

Wednesday, June 14, 2023

One Deadlock of 'row cache lock' and 'library cache lock'

In this Blog, we first demonstrate a deadlock of 'row cache lock' and 'library cache lock', and then provide an analysis.

Note: Tested in Oracle 19.17 on Linux.


1. Test Code


The test involves a cluster table and two Sql statements, one is a select, another is an alter.

drop table test_tab;

drop cluster test_cluster;

create cluster test_cluster (x number);

create index test_cluster_indx on cluster test_cluster;

create table test_tab (x, y) cluster test_cluster(x)
  as select level, 'abc' from dual connect by level <= 3;


create or replace procedure test_proc_sel_tab(p_cnt number) as
begin   
  for i in 1..p_cnt loop
    for c in (select * from test_tab) loop
      null;
    end loop;
    dbms_session.sleep(0.01);
  end loop;
end;
/

-- exec test_proc_sel(5);

create or replace procedure test_proc_alt_cluster(p_cnt number) as
begin
  for i in 1..p_cnt loop
    execute immediate 'ALTER CLUSTER test_cluster PCTFREE 18 INITRANS 19';
  end loop;
end;
/

-- exec test_proc_alt(5);


2. Test Run


Open two Sqlplus sessions, do following test. After a few seconds, Session-2 throws: ORA-00060: deadlock

-- Session-1@T1, run about 10 seconds

exec test_proc_sel_tab(1e4);

-- Session-2@T2, run within 10 seconds

exec test_proc_alt_cluster(1e4);

  -- output
  --   ORA-00060: deadlock detected while waiting for resource
  --   ORA-06512: at line 3
In order to make deadlock reproducible, both sessions execute statements concurrently in a loop.
If both statements are executed only occasionally at the same time, probably the deadlock appears only intermittently.


3. Trace Files


Here trace files after deadlock occurrence.


3.1 Session-1 Trace File



----- Current SQL Statement for this session (sql_id=g677askf3bry6) -----
SELECT * FROM TEST_TAB

----- PL/SQL Call Stack -----
  object      line  object
  handle    number  name
0x9a8b9220         4  procedure K.TEST_PROC_SEL_TAB
0x9a67a4b0         1  anonymous block

--------------------- Binary Stack Dump ---------------------

[1] (ksedst1()+95 -> kgdsdst())
...
[8] (kqrLockPo()+274 -> kqrget())
[9] (kqrpre()+1095 -> kqrLockPo())
[10] (kkdlgstcd()+347 -> kqrpre())
[11] (kkdlgstd()+3385 -> kkdlgstcd())
[12] (kkmfcbloCbk()+179 -> kkdlgstd())
[13] (kkmpfcbk()+235 -> kkmfcbloCbk())
[14] (qcsprfro()+648 -> kkmpfcbk())
[15] (qcsprfro_tree()+372 -> qcsprfro())
[16] (qcsprfro_tree()+138 -> qcsprfro_tree())
[17] (qcspafq()+240 -> qcsprfro_tree())
[18] (qcspqbDescendents()+264 -> qcspafq())
[19] (qcspqb()+255 -> qcspqbDescendents())
[20] (kkmdrv()+180 -> qcspqb())
[21] (opiSem()+2191 -> kkmdrv())
[22] (opiprs()+321 -> opiSem())
[23] (kksParseChildCursor()+527 -> opiprs())
[24] (rpiswu2()+2004 -> kksParseChildCursor())
[25] (kksLoadChild()+5287 -> rpiswu2())
[26] (kxsGetRuntimeLock()+2335 -> kksLoadChild())
[27] (kksfbc()+19638 -> kxsGetRuntimeLock())
[28] (opiexe()+2864 -> kksfbc())

-- kqrLockPo.rsi=r15 = 000000009AE73170 = v$rowcache_parent.ADDRESS of TEST_CLUSTER	

========== FRAME [9] (kqrpre()+1095 -> kqrLockPo()) ========== 
defined by frame pointers 0x7ffdf28dce80  and 0x7ffdf28dcd30
CALL TYPE: call   ERROR SIGNALED: no   COMPONENT: (null)
RDI 000000000000000B RSI 000000009AE73170 RDX 00000000B76FF440 
RCX 0000000000000003 R8 0000000000000000 R9 00007F4C00000000 
RAX 0000000000000000 RBX 0000000000000000 RBP 00007FFDF28DCE80 
R10 00007FFDF28DB010 R11 0000000000000000 R12 0000000000000003 
R13 00007F4CDA143140 R14 00000000A789CB80 R15 000000009AE73170 
RSP 00007FFDF28DCD40 RIP 0000000012DCE457 

          SOC: 0x9c32fb98, type: row cache enqueues (111), map: 0xa5f39680
               state: LIVE (0x99fc), flags: INIT (0x1)
          row cache enqueue: count=1 session=0xb71de728 object=0x9ae73170, request=S
          flag=03 WAI/TRC/-/-/-/-/-/- savepoint=0xd46f
          row cache parent object: addr=0x9ae73170 cid=8(dc_objects)
          conid=0 conuid=0 inc=1, pdbinc=1
          hash=d5c4d330 typ=1 transaction=0xac528090 flags=0000002a
          version=13 mtx version=134
          objectno=4774244 ownerid=49 nsp=5
          name=TEST_CLUSTER
          own=0x9ae73240[0x9d422ca0,0x9d422ca0] wat=0x9ae73250[0x9c32fc18,0x9c32fc18] mode=X req=N
                   
    Current Wait Stack:
     0: waiting for 'row cache lock'
        cache id=0x8, mode=0x0, request=0x3
        wait_id=6425 seq_num=6454 snap_id=1
        wait times: snap=1.030535 sec, exc=1.030535 sec, total=1.030535 sec
        wait times: max=infinite, heur=1.030535 sec
        wait counts: calls=1 os=1
        in_wait=1 iflags=0x55a2

    Dumping one waiter:
      inst: 1, sid: 369, ser: 13483
      wait event: 'library cache lock'
        p1: 'handle address'=0x9aebb010
        p2: 'lock address'=0x9d8cc410
        p3: '100*mode+namespace'=0x48d96600010003
      row_wait_obj#: 4294967295, block#: 0, row#: 0, file# 0
      min_blocked_time: 0 secs, waiter_cache_ver: 117


3.2 Session-2 Trace file



----- Current SQL Statement for this session (sql_id=42y5q6y5vyxsw) -----
ALTER CLUSTER test_cluster PCTFREE 18 INITRANS 19

----- PL/SQL Call Stack -----
  object      line  object
  handle    number  name
0x9a67e1c0         4  procedure K.TEST_PROC_ALT_CLUSTER
0x9a6665d8         1  anonymous block

--------------------- Binary Stack Dump ---------------------

[1] (ksedst1()+95 -> kgdsdst()) 
...
[7] (kglLockWait()+1240 -> ksfgti()) 
[8] (kgllkal()+4711 -> kglLockWait()) 
[9] (kglLock()+1425 -> kgllkal()) 
[10] (kglget()+293 -> kglLock()) 
[11] (kkdllk0()+418 -> kglget()) 
[12] (kkdllcl()+557 -> kkdllk0()) 
[13] (kkdllct()+26 -> kkdllcl()) 
[14] (acldrv()+1359 -> kkdllct()) 
[15] (opiexe()+30659 -> acldrv()) 

-- kgllkal.rdx = 000000009AEBB010 = v$db_object_cache.ADDR for TEST_TAB

========== FRAME [9] (kglLock()+1425 -> kgllkal()) ========== 
defined by frame pointers 0x7ffd39edb340  and 0x7ffd39edade0
CALL TYPE: call   ERROR SIGNALED: no   COMPONENT: LIBCACHE
RDI 00007F087E5419C0 RSI 00007FFD39EDB2E0 RDX 000000009AEBB010 
RCX 0000000000000003 R8 000000009AEBB090 R9 00007FFD39EDBAA0 
RAX 00000000A541C1F8 RBX 0000000000000001 RBP 00007FFD39EDB340 
R10 00007FFD39ED8FF0 R11 00000000132262E0 R12 0000000000000000 
R13 0000000000000000 R14 0000000000000000 R15 0000000000000006 
RSP 00007FFD39EDADF0 RIP 00000000130AC291 

    Current Wait Stack:
     0: waiting for 'library cache lock'
        handle address=0x9aebb010, lock address=0x9d8cc410, 100*mode+namespace=0x48d96600010003
        wait_id=5715 seq_num=5755 snap_id=1
        wait times: snap=1.002169 sec, exc=1.002169 sec, total=1.002169 sec
        wait times: max=15 min 0 sec, heur=1.002169 sec
        wait counts: calls=1 os=1
        in_wait=1 iflags=0x55a2
        
    Dumping one waiter:
      inst: 1, sid: 549, ser: 40906
      wait event: 'row cache lock'
        p1: 'cache id'=0x8
        p2: 'mode'=0x0
        p3: 'request'=0x3
      row_wait_obj#: 105930, block#: 271548, row#: 0, file# 47
      min_blocked_time: 0 secs, waiter_cache_ver: 117


3.3 diag_base trace file



DEADLOCK DETECTED (id=0x770be968)
Chain Signature: 'library cache lock'<='row cache lock' (cycle)
 
-------------------------------------------------------------------------------
    Oracle session identified by:
    {
              process id: 57, oracle@test_db
              session id: 549
    }
    is waiting for 'row cache lock' with wait info:
    {
                      p1: 'cache id'=0x8                 -- P1=8  dc_objects (PARENT) or dc_object_grants
                      p2: 'mode'=0x0
                      p3: 'request'=0x3                  -- P3=3 (share mode), P3=5 (exclusive)
             current sql: SELECT * FROM TEST_TAB
    }
    and is blocked by
 => Oracle session identified by:
    {
              process id: 50, oracle@test_db
              session id: 369
    }
    which is waiting for 'library cache lock' with wait info:
    {
                      p1: 'handle address'=0x9aebb010     -- P1=0x9aebb010 TEST_TAB
                      p2: 'lock address'=0x9d8cc410
                      p3: '100*mode+namespace'=0x48d96600010003
                    Name: K.TEST_TAB   [Nsp: 1 Typ: 2 Id: 4774246]
           timeout after: 14 min 59 sec                   -- 'library cache lock' maximum 900 seconds
             current sql: ALTER CLUSTER test_cluster PCTFREE 18 INITRANS 19
    }
    and is blocked by the session at the start of the chain. 


4. GDB Debug



4.1 Meta Info


First, we collect some Meta Info of our test objects.

select name, type, addr, hash_value, v.* from v$db_object_cache v 
 where name in ('TEST_CLUSTER', 'TEST_CLUSTER_INDX', 'TEST_TAB');

  NAME               TYPE     ADDR      HASH_VALUE
  ------------------ -------- --------- ----------
  TEST_CLUSTER_INDX  INDEX    9A9F23B8   531250600
  TEST_CLUSTER       CLUSTER  9AE66A10  2742975119
  TEST_TAB           TABLE    9AEBB010  1905210899


-- Blog: Oracle ROWCACHE Views and Contents (http://ksun-oracle.blogspot.com/2018/10/oracle-rowcache-views.html)

----helper function----
create or replace function dump_hex2str (dump_hex varchar2) return varchar2 is
  l_str varchar2(100);
begin
  with sq_pos as (select level pos from dual connect by level <= 1000)
      ,sq_chr as (select pos, chr(to_number(substr(dump_hex, (pos-1)*2+1, 2), 'XX')) ch
                  from sq_pos where pos <= length(dump_hex)/2)
  select listagg(ch, '') within group (order by pos) word
    into l_str
  from sq_chr;
  return l_str;
end;
/

select * from (
  select to_number(ltrim((substr(key, 7, 2)||substr(key, 5, 2)||substr(key, 3, 2)||substr(key, 1, 2)), '0'), 'XXXX') Schema_User_ID,
         (select username from dba_users where user_id = 
            to_number(ltrim((substr(key, 7, 2)||substr(key, 5, 2)||substr(key, 3, 2)||substr(key, 1, 2)), '0'), 'XXXX')) username,
         dump_hex2str(rtrim(substr(key, 13), '0')) dc_object_name, 
         to_number(trim(both '0' from substr(key, 11, 2)||substr(key, 9, 2)), 'XXXX') key_str_len,
         v.* 
  from v$rowcache_parent v 
  where cache_name in ('dc_objects') 
) where dc_object_name = 'TEST_CLUSTER';

  DC_OBJECT_NAME  ADDRESS
  --------------  --------
  TEST_CLUSTER    9AE73170


4.2 GDB script


Based on above meta info, we compose a gdb script:

------------------------- gdb -x gdb_rowcache_libcache_deadlock -p 3214 -------------------------
# first run test_proc_alt_cluster(1) to make test reproducible.
# For kglnaobj_and_schema, Tracing Library Cache Locks 3: Object Names 
#     (https://nenadnoveljic.com/blog/library-cache-lock-object-name/)

# kgllkal.rdx = 9AEBB010 = v$db_object_cache.ADDR for TEST_TAB
# kqrLockPo.rsi=r15 = 9AE73170 = v$rowcache_parent.ADDRESS of TEST_CLUSTER	

set pagination off
set logging file /tmp/gdb.log
set logging overwrite on
set logging on
set $KGLLKHDL_ADDR=0X9AEBB010
set $ROWCACHE_PO_ADDR=0X9AE73170
set $s = 0
set $k = 0
set $r = 0

break *kgllkal if $rdx==$KGLLKHDL_ADDR
command
printf "S-%i ===== LibCache Seq: %i -- kgllkal (kgllkhdl Addr(rdx)=>%X, Mode(rcx)=>%X), kglnaobj_and_schema: %s --\n", ++$s, ++$k, $rdx, $rcx, ($rdx+0x1c0)
bt 3
cont
end

# kgllkdl or kglUnLock. ALTER CLUSTER GDB calls kgllkdl.
break *kgllkdl if $r8==$KGLLKHDL_ADDR
command
printf "S-%i ===== LibCache Seq: %i -- kgllkdl (kgllkhdl Addr(r8)=>%X) --\n", ++$s, ++$k, $r8
bt 3
cont
end

# kgllkdl or kglUnLock. QUERY GDB calls kglUnLock.
break *kglUnLock if $r8==$KGLLKHDL_ADDR || $r15==$KGLLKHDL_ADDR
command
printf "S-%i ===== LibCache Seq: %i -- kglUnLock (kgllkhdl Addr(r8)=>%X or Addr(r15)=>%X) --\n", ++$s, ++$k, $r8, $r15
bt 3
cont
end

break *kqrLockPo if $rsi==$ROWCACHE_PO_ADDR || $r15==$ROWCACHE_PO_ADDR
command
printf "S-%i ===== RowCache Seq: %i -- kqrLockPo (Addr(rsi)=>%X, Request Mode(rcx)=>%X) --\n", ++$s, ++$r, $rsi, $rcx
bt 3
cont
end

# kqrReleaseLock or kqreqd
break *kqrReleaseLock if $rdx==$ROWCACHE_PO_ADDR || $rdi==$ROWCACHE_PO_ADDR
command
printf "S-%i ===== RowCache Seq: %i -- kqrReleaseLock (Addr(rdx)=>%X) --\n", ++$s, ++$r, $rdx
bt 3
cont
end

cont


4.3 ALTER CLUSTER GDB


We run ALTER CLUSTER with gdb trace, here the output:

06:55:14 SQL >  exec test_proc_alt_cluster(1);

Breakpoint 4, 0x0000000012dd0890 in kqrLockPo ()
S-1 ===== RowCache Seq: 1 -- kqrLockPo (Addr(rsi)=>9AE73170, Request Mode(rcx)=>5) --
#0  0x0000000012dd0890 in kqrLockPo ()
#1  0x0000000012dce457 in kqrpre ()
#2  0x00000000030b573d in kkdlGetRowCacheOb ()

Breakpoint 1, 0x00000000130b0b40 in kgllkal ()
S-2 ===== LibCache Seq: 1 -- kgllkal (kgllkhdl Addr(rdx)=>9AEBB010, Mode(rcx)=>3), kglnaobj_and_schema: TEST_TABK --
#0  0x00000000130b0b40 in kgllkal ()
#1  0x00000000130ac291 in kglLock ()
#2  0x00000000130a70c5 in kglget ()

Breakpoint 1, 0x00000000130b0b40 in kgllkal ()
S-3 ===== LibCache Seq: 2 -- kgllkal (kgllkhdl Addr(rdx)=>9AEBB010, Mode(rcx)=>1), kglnaobj_and_schema: TEST_TABK --
#0  0x00000000130b0b40 in kgllkal ()
#1  0x00000000130ac291 in kglLock ()
#2  0x00000000130a70c5 in kglget ()

Breakpoint 2, 0x00000000130b34d0 in kgllkdl ()
S-4 ===== LibCache Seq: 3 -- kgllkdl (kgllkhdl Addr(r8)=>9AEBB010) --
#0  0x00000000130b34d0 in kgllkdl ()
#1  0x00000000130ae53c in kglUnLock ()
#2  0x0000000012bbaad1 in ktagetg0 ()

Breakpoint 5, 0x0000000012dd5b40 in kqrReleaseLock ()
S-5 ===== RowCache Seq: 2 -- kqrReleaseLock (Addr(rdx)=>9AE73170) --
#0  0x0000000012dd5b40 in kqrReleaseLock ()
#1  0x0000000012dd4e9a in kqreqd ()
#2  0x00000000128efaaa in kss_del_cb ()


4.4 Query GDB


We run Query with gdb trace, here the output:

06:57:32 SQL > exec test_proc_sel_tab(1);

Breakpoint 1, 0x00000000130b0b40 in kgllkal ()
S-1 ===== LibCache Seq: 1 -- kgllkal (kgllkhdl Addr(rdx)=>9AEBB010, Mode(rcx)=>2), kglnaobj_and_schema: TEST_TABK --
#0  0x00000000130b0b40 in kgllkal ()
#1  0x00000000130ac291 in kglLock ()
#2  0x00000000130a70c5 in kglget ()

Breakpoint 4, 0x0000000012dd0890 in kqrLockPo ()
S-2 ===== RowCache Seq: 1 -- kqrLockPo (Addr(rsi)=>9AE73170, Request Mode(rcx)=>3) --
#0  0x0000000012dd0890 in kqrLockPo ()
#1  0x0000000012dce457 in kqrpre ()
#2  0x00000000030bf8ab in kkdlgstcd ()

Breakpoint 5, 0x0000000012dd5b40 in kqrReleaseLock ()
S-3 ===== RowCache Seq: 2 -- kqrReleaseLock (Addr(rdx)=>9AE73170) --
#0  0x0000000012dd5b40 in kqrReleaseLock ()
#1  0x0000000012dd4e9a in kqreqd ()
#2  0x0000000012dd4833 in kqrprl ()

Breakpoint 4, 0x0000000012dd0890 in kqrLockPo ()
S-4 ===== RowCache Seq: 3 -- kqrLockPo (Addr(rsi)=>9AE73170, Request Mode(rcx)=>3) --
#0  0x0000000012dd0890 in kqrLockPo ()
#1  0x0000000012dd7b22 in kqrpre1 ()
#2  0x00000000032d0fdf in kqlobjlod ()

Breakpoint 5, 0x0000000012dd5b40 in kqrReleaseLock ()
S-5 ===== RowCache Seq: 4 -- kqrReleaseLock (Addr(rdx)=>9AE73170) --
#0  0x0000000012dd5b40 in kqrReleaseLock ()
#1  0x0000000012dd4e9a in kqreqd ()
#2  0x0000000012dd4833 in kqrprl ()

Breakpoint 3, 0x00000000130ae460 in kglUnLock ()
S-6 ===== LibCache Seq: 2 -- kglUnLock (kgllkhdl Addr(r8)=>9AEBB010) --
#0  0x00000000130ae460 in kglUnLock ()
#1  0x00000000030aaf05 in kkdcicf ()
#2  0x0000000003094ab6 in kkmpost ()

Breakpoint 3, 0x00000000130ae460 in kglUnLock ()
S-7 ===== LibCache Seq: 3 -- kglUnLock (kgllkhdl Addr(r8)=>9AEBB010) --
#0  0x00000000130ae460 in kglUnLock ()
#1  0x00000000030aaf05 in kkdcicf ()
#2  0x0000000003094ab6 in kkmpost ()


5. Root Cacuse Analysis


Look gdb lock request order of both sessions:

(a). ALTER CLUSTER GDB shows: 

       S-1 ===== kqrLockPo ()         'row cache lock'     reqeust on TEST_CLUSTER 
                                      
       S-2 ===== kgllkal ()           'library cache lock' allocate on TEST_TAB
                                      
       S-3 ===== kgllkal ()           'library cache lock' delete on TEST_TAB
                                      
       S-4 ===== kgllkdl ()           'library cache lock' delete on TEST_TAB
                                      
       S-5 ===== kqrReleaseLock ()    'row cache lock'     release on TEST_CLUSTER 

(b). Query GDB shows
        
       S-1 ===== kgllkal ()           'library cache lock' allocate on TEST_TAB
       
       S-2 ===== kqrLockPo ()         'row cache lock'     reqeust on TEST_CLUSTER
       
       S-3 ===== kqrReleaseLock ()    'row cache lock'     release on TEST_CLUSTER 
       
       S-4 ===== kqrLockPo ()         'row cache lock'     reqeust on TEST_CLUSTER
       
       S-5 ===== kqrReleaseLock ()    'row cache lock'     release on TEST_CLUSTER 
       
       S-6 ===== kglUnLock ()         'library cache lock' unlock on TEST_TAB
       
       S-7 ===== kglUnLock ()         'library cache lock' unlock on TEST_TAB
ALTER CLUSTER GDB shows that 'library cache lock' (from allocate till delete) is completely held within 'row cache lock' holding.
(hence both locks can be held at the same time, and 'row cache lock' can be held longer time)

Query GDB shows that 'row cache lock' (from request till release) is completely held within 'library cache lock' holding.
(hence both locks can be held at the same time, and 'library cache lock' can be held longer time)

S1 and S2 in ALTER CLUSTER GDB is reverse order of S1 and S2 in Query GDB.

Therefore the deadlock is caused by the crossing order of 'row cache lock' and 'library cache lock' reqeusts.

There are also similar cases of deadlocks documented in:

  Oracle MOS: Deadlock on Row Cache Lock and Library Cache Lock Dropping REF-PARTITION (Doc ID 2796372.1)

  Deadlock Caused by Row Cache and Library Cache Locks (https://nenadnoveljic.com/blog/deadlock-row-cache-lock-library-cache-lock/)

Monday, April 10, 2023

Oracle Multi-Consumer AQ dequeue with dbms_aq.listen Wait Events (I)

Wait Events (I)           Performance Model (II)           Performance Analysis and Workaround (III)     


We will compose three Blogs to discuss Performance of Oracle Multi-Consumer AQ dequeue with dbms_aq.listen.

The first Blog performs test to show Wait Events: "library cache: mutex X" on queue object and "latch free" on Latch "channel operations parent latch".

The second Blog (II) build Performance Model and reveals implementation problems.

The third Blog (III) traces dequeue operation with different number of parallel concurrent sessions, and conclude with the performance Analysis.

Note: tested on Oracle 19.17 in Linux and 19.7 on Solaris


1. Test Setup


We create one multiple_consumer queue, then compare two dequeue methods: listen_deq and subscr_deq.
listen_deq is using dbms_aq.listen, subscr_deq is with subscriber.

At first, we create 100 subscribers (see appended Test Code):

   exec create_subscribers('msg_queue_multiple', 100);


2. Test Run


We run two tests as follows:

   exec test_run(80, 1, 10*60, 1);   -- listen dequeue 
   
   exec test_run(80, 2, 10*60, 1);   -- subscriber dequeue
It first starts 80 enqueue Jobs and 80 listen_deq Jobs for 10 minutes, gets AWR.
Then starts 80 enqueue Jobs and 80 subscr_deq Jobs for 10 minutes, gets AWR.


2.1 listen_deq


AWR shows heavy "library cache: mutex X" waits on queue object: msg_queue_multiple:

Top 10 Foreground Events by Total Wait Time

  Event                   Waits     Total Wait Time (sec) Avg Wait  % DB time Wait Class
  ----------------------- --------- --------------------- --------- --------- ----------
  library cache: mutex X  4,075,073 9538.6                2.34ms    14.5      Concurrency
  latch free                 68,333 1195.5                17.49ms   1.8       Other

Top Event P1/P2/P3 Values

  Event                   % Event P1,           P2,   P3 Values           % Activity  Parameter 1 Parameter 2 Parameter 3
  ----------------------- ------- ------------- ----- ------------------- ----------- ----------- ----------- -----------
  library cache: mutex X  14.28   "198468696",  "0",  "23028197302599684" 0.47        idn         value       where
  latch free               2.18   "3012157976", "98", "3012157904"        2.16        address     number      why


Note: "library cache: mutex X" on idn=198468696, V$DB_OBJECT_CACHE.name='MSG_QUEUE_MULTIPLE', V$DB_OBJECT_CACHE.hash_value=198468696.

      "latch free" on Latch "channel operations parent latch" (V$LATCH.latch#=98), child: 144 (V$LATCH_CHILDREN.child#=144 for latch#=98).


2.2 subscr_deq


AWR shows less "library cache: mutex X" waits on queue object: msg_queue_multiple,
and no "latch free" on Latch "channel operations parent latch" is listed in Top 10 Events.

Top 10 Foreground Events by Total Wait Time

  Event                   Waits     Total Wait Time (sec) Avg Wait  % DB time Wait Class
  ----------------------- --------- --------------------- --------- --------- ----------
  library cache: mutex X  1,070,942 3235                  3.02ms    3.5       Concurrency

Top Event P1/P2/P3 Values

  Event                   % Event P1,           P2,   P3 Values           % Activity  Parameter 1 Parameter 2 Parameter 3
  ----------------------- ------- ------------- ----- ------------------- ----------- ----------- ----------- -----------
  library cache: mutex X  3.40    "198468696",  "0",  "23028197302599684" 0.26        idn         value       where


3. Test Code



3.1 Queue Table, Queue and Subscribers



--  Oracle Advanced Queuing by Example (https://docs.oracle.com/cd/B10500_01/appdev.920/a96587/apexampl.htm)

execute dbms_aqadm.stop_queue (queue_name         => 'msg_queue_multiple');

execute dbms_aqadm.drop_queue (queue_name         => 'msg_queue_multiple');

execute dbms_aqadm.drop_queue_table (queue_table  => 'MultiConsumerMsgs_qtab', force=> TRUE);

drop type Message_typ force;

/* Creating a message type: */
create or replace noneditionable type message_typ as object (
	subject     varchar2(30),
	text        varchar2(256),
	return_msg  clob);
/   

----- 2. Creating a Multiconsumer Queue Table and Queue

begin
  dbms_aqadm.create_queue_table (
		queue_table        => 'MultiConsumerMsgs_qtab',
		multiple_consumers => TRUE, 
		compatible        => '10.0.0',
		sort_list         =>  'PRIORITY,ENQ_TIME',
		queue_payload_type => 'Message_typ');
end;
/

begin
  dbms_aqadm.create_queue (
		queue_name         => 'msg_queue_multiple',
		queue_table        => 'MultiConsumerMsgs_qtab',
	  max_retries        =>   5,
    retry_delay        =>   2,
    retention_time     =>   86400);
end;
/

-- exec dbms_aqadm.alter_queue('MSG_QUEUE_MULTIPLE', 10, 2, 86400);

begin
  dbms_aqadm.start_queue (
		queue_name         => 'msg_queue_multiple');
end;
/


create or replace procedure purge_queue_table as
	po dbms_aqadm.aq$_purge_options_t;
begin
	dbms_aqadm.purge_queue_table('MultiConsumerMsgs_qtab', null, po);
end;
/

create or replace procedure create_subscribers(p_queue_name varchar2, p_cnt number) as
   l_subscriber sys.aq$_agent;
begin
   for i in 1..p_cnt loop
     begin
		   l_subscriber := sys.aq$_agent('KSUB_'||i, null, null);
		   dbms_aqadm.add_subscriber(queue_name => p_queue_name, subscriber => l_subscriber);
	   exception when others then
	      dbms_output.put_line ('Error '||sqlerrm);
	      commit;
	   end;
   end loop;
end;
/

--  Test
--exec create_subscribers('msg_queue_multiple', 100);

create or replace procedure remove_subscribers(p_queue_name varchar2, p_cnt number) as
   l_subscriber SYS.aq$_agent;
begin
	  for idx in (select consumer_name from dba_queue_subscribers a where a.queue_name = p_queue_name
	                                                                --and consumer_name like 'KSUB%'
	  ) loop
	    l_subscriber := sys.aq$_agent(idx.consumer_name, null, null);
	    dbms_aqadm.remove_subscriber(p_queue_name, l_subscriber);
	  end loop;
end;
/

-- exec remove_subscribers('MSG_QUEUE_MULTIPLE', 100);


3.2 Job clearup Helper and cpu_burner simulator



create or replace procedure clearup_test as
begin
  for c in (select * from dba_scheduler_jobs where job_name like '%TEST_JOB%') loop
    begin
      --set DBA_SCHEDULER_JOBS.enabled=FALSE
	    dbms_scheduler.disable (c.job_name, force => true, commit_semantics =>'ABSORB_ERRORS');
	    --set DBA_SCHEDULER_JOBS.enabled=TRUE, so that it can be scheduled to run (state='RUNNING')
	    --  dbms_scheduler.enable (c.job_name, commit_semantics =>'ABSORB_ERRORS');
	  exception when others then null;
	  end;
	end loop;
	
  for c in (select * from dba_scheduler_running_jobs where job_name like '%TEST_JOB%') loop
    begin
      --If force=FALSE, gracefully stop the job, slave process can update the status of the job in the job queue.
      --If force= TRUE, the Scheduler immediately terminates the job slave.
      --For repeating job with attribute "start_date => systimestamp" and enabled=TRUE, 
      --re-start immediate (state changed from 'SCHEDULED' to 'RUNNING'), DBA_SCHEDULER_JOBS.run_count increases 1.
	    dbms_scheduler.stop_job (c.job_name, force => true, commit_semantics =>'ABSORB_ERRORS');
	  exception when others then null;
	  end;
	end loop;
	
  for c in (select * from dba_scheduler_jobs where job_name like '%TEST_JOB%') loop
    begin
      --If force=TRUE, the Scheduler first attempts to stop the running job instances 
      --(by issuing the STOP_JOB call with the force flag set to false), and then drops the jobs.
	    dbms_scheduler.drop_job (c.job_name, force => true, commit_semantics =>'ABSORB_ERRORS');
	    -- if there is still not-killable jobs, try following
	    --dbms_scheduler.drop_job (c.job_name, force => true);
	  exception when others then null;
	  end;
	end loop;
end;
/

--exec clearup_test;

-- Simulate CPU Load
create or replace procedure cpu_burner (n number) as
  x   number := 0;
begin
  for i in 1 .. n loop
    x := mod (n, 999999) + sqrt (i);
  end loop;
end;
/


3.3 Enqueue, Dequeue (listen_deq and subscr_deq)



create or replace procedure enq(p_sub number) as
   l_enqueue_options     dbms_aq.enqueue_options_t;
   l_message_properties  dbms_aq.message_properties_t;
   l_recipients          dbms_aq.aq$_recipient_list_t;
   l_message_handle      raw(16);
   l_message             message_typ;
   l_return_msg          clob; 
   l_rep_string          varchar2(32000);
   c_queue_name          varchar2(30) := 'K.MSG_QUEUE_MULTIPLE';
   l_name                varchar2(100);
   l_agt_name            varchar2(100);
begin
   --visibility:
     --   dbms_aq.immediate: no commit is required, message is enqueued and committed for dequeue immediately in an autonomous transaction.
     --   dbms_aq.on_commit (default): commit is required, message is visibile for dequeue after COMMIT. enqueue session has an open transaction before commit.
     
   l_enqueue_options.visibility    := dbms_aq.immediate;
   l_enqueue_options.delivery_mode := dbms_aq.persistent;
   
   l_name := 'Message for KSUB_'||p_sub;
   
   ----- when dbms_lob used, "ibrary cache: mutex X" on dbms_lob
   -- l_rep_string := rpad('ABC', 4000, 'x');
   -- dbms_lob.createtemporary(l_return_msg, TRUE, dbms_lob.session);    
   -- l_return_msg := l_name;   
   -- write 300 KB CLOB, It will cause heavy "enq: HW - contention" wait event.
   --  for i in 1..10 loop
   --    dbms_lob.writeappend(l_return_msg, length(l_rep_string), l_rep_string);
   --  end loop;
   
   l_return_msg :=  rpad('ABC', 4000, 'x');
   l_message := message_typ(l_name, to_char(sysdate, 'YYYY-MM-DD HH24:MI:SS')||
                        '. This message is queued for recipient: Subscriber_'||p_sub, l_return_msg);
   l_agt_name := 'KSUB_'||p_sub;    -- subscriber, M_IDEN
	 l_recipients(1) := SYS.aq$_agent(l_agt_name,  NULL, NULL);     --(name, address, protocol)
   l_message_properties.recipient_list := l_recipients;
   l_message_properties.correlation := l_agt_name;

   dbms_aq.enqueue(
     queue_name         => c_queue_name,
     enqueue_options    => l_enqueue_options,
     message_properties => l_message_properties,
     payload            => l_message,
     msgid              => l_message_handle);
   
   --debug('enq '||l_name);
   dbms_output.put_line (to_char(sysdate, 'YYYY-MM-DD HH24:MI:SS')||' --- enqueue '||l_name||
                         ', corr_id=' || l_message_properties.correlation||', text=' || l_message.text);
   commit;    -- enqueue with dbms_aq.immediate, no commit required
end;
/

-- exec enq(1);

-- With using sys.aq$_agent, heavy "latch frree" on "channel operations parent latch" (latch#=98) 
--    on a particular child: v$latch_children.CHILD#=144
--    Event: wait for unread message on broadcast channel
create or replace procedure listen_deq(p_sub number, p_wait number := 10) as
   l_dequeue_options     dbms_aq.dequeue_options_t;
   l_message_properties  dbms_aq.message_properties_t;
   l_message_handle      raw(16);
   l_message             message_typ;
   l_listen_list         DBMS_AQ.aq$_agent_list_t;
   l_agent               sys.aq$_agent;
   c_queue_name          varchar2(30) := 'K.MSG_QUEUE_MULTIPLE';
   l_name                varchar2(100);
   l_agt_name            varchar2(100);
   l_start_time          number; 
   l_elapsed_time        pls_integer := 0;
   l_timeout             pls_integer := p_wait *100;   --centisecond
   l_ret                 varchar2(100);
   no_messages           exception;
   pragma exception_init (no_messages, -25228);
begin
    l_agt_name := 'KSUB_'||p_sub;
    l_listen_list(1):= sys.aq$_agent(l_agt_name, c_queue_name, null);
    l_start_time:= dbms_utility.get_time();    -- this has overflow !
    
    while l_ret is null and (dbms_utility.get_time() - l_start_time) < l_timeout loop
	    begin
	      -- dbms_aq.listen can have false wakeup if two or more sessions are waiting with dbms_aq.listen, 
	      -- but only one enqueued message is arrived.
          
          -- when dequeue_options.visibility = dbms_aq.on_commit (default, it has an open transaction, requires commit)
          -- listen call will return a false positive when another session has an uncommitted dequeue on a multi-consumer queue.
          -- Listen call is supposed to return a true if it finds one message for a subscriber.
          -- When there is an uncommitted dequeue, message is still there in queue table. So listen will return true.
          -- Also it might be the case that first session rolls back just before commit.
 
		    dbms_aq.listen(
		        agent_list => l_listen_list
		       ,wait       => p_wait
		       ,agent      => l_agent
		      );    
		      
		    l_dequeue_options.consumer_name := l_agent.name;
		    l_dequeue_options.visibility    := dbms_aq.immediate;    -- for dequeue, when on_commit, there is an open transaction, requires commit
		    l_dequeue_options.delivery_mode := dbms_aq.persistent;
		    l_dequeue_options.navigation    := dbms_aq.first_message;
		    l_dequeue_options.wait          := DBMS_AQ.NO_WAIT;
		    
			  dbms_aq.dequeue(
			    queue_name         => c_queue_name,
			    dequeue_options    => l_dequeue_options,
			    message_properties => l_message_properties,
			    payload            => l_message,
			    msgid              => l_message_handle);
			  commit;
			  
			  dbms_output.put_line (to_char(sysdate, 'YYYY-MM-DD HH24:MI:SS')||' >>> dequeue ' || l_message.subject||
			                        ', corr_id=' || l_message_properties.correlation||
			                        ', text=' || l_message.text|| ' , return_msg Length=' || length(l_message.return_msg) ||
			                        ' -- dequeue_options.consumer_name for '||l_agt_name);
			  --dequeue_options.navigation := DBMS_AQ.NEXT_MESSAGE;
			  --debug('deq '||l_message.subject);
			  l_ret := l_message.text;
			  exit;  --DEQUEUE SUC, exit, 
			  
			  exception 
			    when NO_MESSAGES then
			      dbms_output.put_line (to_char(sysdate, 'YYYY-MM-DD HH24:MI:SS')||' -- Error for '||l_agt_name||', '||SQLERRM);
			      commit;
			    when OTHERS then
			      dbms_output.put_line (to_char(sysdate, 'YYYY-MM-DD HH24:MI:SS')||' -- Error for '||l_agt_name||', '||SQLERRM);
			      l_ret := SQLERRM;
			      commit;
			end;
	end loop;
end;
/

-- exec listen_deq(1, 100);

-- Without using sys.aq$_agent, avoid "latch free" on "channel operations parent latch" (latch#=98)
--     Event: Streams AQ: waiting for messages in the queue
--     Same Event in non-multi-consumer(i.e, single) queue
create or replace procedure subscr_deq(p_sub number, p_wait number := 10) as
   l_dequeue_options     dbms_aq.dequeue_options_t;
   l_message_properties  dbms_aq.message_properties_t;
   l_message_handle      raw(16);
   l_message             message_typ;
   c_queue_name          varchar2(30) := 'K.MSG_QUEUE_MULTIPLE';
   l_msg_cnt             number := 0;
   l_try_limit           number := 1;      --- only dequeue one message
   l_try_cnt             number := 0;
   l_agt_name            varchar2(100);
   no_messages           exception;
   pragma exception_init (no_messages, -25228);
begin
  l_agt_name :=  'KSUB_'||p_sub;
  l_dequeue_options.visibility    := dbms_aq.immediate;
  l_dequeue_options.delivery_mode := dbms_aq.persistent;
  l_dequeue_options.wait          := p_wait;     --DBMS_AQ.NO_WAIT;   -- wait 10 seconds
  l_dequeue_options.consumer_name := l_agt_name;
	l_dequeue_options.navigation    := dbms_aq.first_message;
	--dequeue_options.navigation    := dbms_aq.next_message;   
	
	loop
	  l_try_cnt := l_try_cnt + 1;
	  begin
	    dbms_aq.dequeue(
	      queue_name         => c_queue_name,
	      dequeue_options    => l_dequeue_options,
	      message_properties => l_message_properties,
	      payload            => l_message,
	      msgid              => l_message_handle);
	    COMMIT;
			dbms_output.put_line (to_char(sysdate, 'YYYY-MM-DD HH24:MI:SS')||' >>> dequeue ' || l_message.subject||
			                        ', corr_id=' || l_message_properties.correlation||
			                        ', text=' || l_message.text|| ' , return_msg Length=' || length(l_message.return_msg)||
			                        ' -- dequeue_options.consumer_name for '||l_agt_name);
			
	    --debug('deq '||l_message.subject);
	    l_msg_cnt := l_msg_cnt + 1;
	    
	    exception when no_messages then
	      dbms_output.put_line (to_char(sysdate, 'YYYY-MM-DD HH24:MI:SS')||' -- Error for '||l_agt_name||', '||SQLERRM);
	      commit;
    end;

	  if l_msg_cnt > 0 or l_try_cnt >= l_try_limit then 
	    exit;
	  end if; 
	end loop;  
end;
/

-- exec subscr_deq(1, 10);


--  create or replace type sys.aq$_agent as object
--  (name     varchar2 (30),     -- M_IDEN, name of a message producer or consumer
--   address  varchar2 (1024),   -- address where message must be sent
--   protocol number             -- protocol for communication, must be 0
--  );
--  
--  alter type aq$_agent modify attribute (name varchar2 (512)) cascade
--  /
-- 
--  DBMS_AQ.LISTEN (
--     agent_list            IN    AQ$_AGENT_LIST_T,
--     wait                  IN    BINARY_INTEGER DEFAULT DBMS_AQ.FOREVER,
--     agent                 OUT   SYS.AQ$_AGENT);


3.4 Test Jobs



-- enqueue loop
-- adjust cpu_burner
create or replace procedure enq_loop(p_sub number, p_dur_seconds number) as
  l_timeout    pls_integer := p_dur_seconds *100;    -- centisecond
  l_start_time number := dbms_utility.get_time();    -- this has overflow !
  l_cnt        number := 0;
begin
  dbms_application_info.set_action('Enq Job '||p_sub);
  while (dbms_utility.get_time() - l_start_time) < l_timeout
  loop 
    cpu_burner(1*1e4);
    enq(p_sub);
    l_cnt := l_cnt + 1;
  end loop;
  dbms_output.put_line('Enq Messages = '||l_cnt||', Dureation (seconds) = '||((dbms_utility.get_time() - l_start_time)/100));
end;
/

-- exec enq_loop(1, 10);

-- adjust cpu_burner
create or replace procedure start_enq_jobs(p_cnt number, p_dur_seconds number) as
  l_job_name varchar2(50);
begin
  for i in 1..p_cnt loop
    l_job_name := 'TEST_JOB_ENQ_'||i;
    dbms_scheduler.create_job (
      job_name        => l_job_name,
      job_type        => 'PLSQL_BLOCK',
      job_action      => 
        'begin 
           enq_loop('||i||', '||p_dur_seconds||');
        end;',    
      start_date      => systimestamp,
      --repeat_interval => 'systimestamp',
      auto_drop       => true,
      enabled         => true);
  end loop;
end;
/

-- dequeue loop
-- adjust cpu_burner
create or replace procedure sel_deq_loop(p_sub number, p_sel number := 1, p_dur_seconds number, p_wait number := 10) as
  l_timeout    pls_integer := p_dur_seconds *100;    --centisecond
  l_start_time number := dbms_utility.get_time();    -- this has overflow !
  l_cnt        number := 0;
begin
  dbms_application_info.set_action('Deq Job '||p_sub);
  while (dbms_utility.get_time() - l_start_time) < l_timeout
  loop
    cpu_burner(1*1e2);
    if p_sel = 1 then
      listen_deq(p_sub, p_wait);
    else
      subscr_deq(p_sub, p_wait);
    end if; 
    l_cnt := l_cnt + 1; 
  end loop;
  
  if p_sel = 1 then
    dbms_output.put_line('Listener Deq Execs (SUC and Failed) = '||l_cnt||', Duration (seconds) = '||((dbms_utility.get_time() - l_start_time)/100));
  else
    dbms_output.put_line('Subscriber Deq Execs (SUC and Failed) = '||l_cnt||', Duration (seconds) = '||((dbms_utility.get_time() - l_start_time)/100));
  end if;
end;
/

-- exec sel_deq_loop(1, 1, 10);   -- listen dequeue 
-- exec sel_deq_loop(1, 2, 10);   -- subscriber dequeue

create or replace procedure start_deq_jobs(p_cnt number, p_sel number := 1, p_dur_seconds number) as
  l_job_name varchar2(50);
begin
  for i in 1..p_cnt loop
    l_job_name := 'TEST_JOB_DEQ_'||i;
    dbms_scheduler.create_job (
      job_name        => l_job_name,
      job_type        => 'PLSQL_BLOCK',
      job_action      => 
        'begin 
           sel_deq_loop('||i||','||p_sel||', '||p_dur_seconds||');
        end;',    
      start_date      => systimestamp,
      --repeat_interval => 'systimestamp',
      auto_drop       => true,
      enabled         => true);
  end loop;
end;
/


3.5 Test Run: listen vs. subscriber



create or replace procedure test_run (p_jobs number, p_sel number, p_dur_seconds number, is_awr number := 0) as
begin
  clearup_test;
  purge_queue_table;
  
  if is_awr > 0 then SYS.DBMS_WORKLOAD_REPOSITORY.create_snapshot('ALL'); end if;
  start_enq_jobs(p_jobs, p_dur_seconds);
  start_deq_jobs(p_jobs, p_sel, p_dur_seconds);     -- listen dequeue 
  
  -- after 10 minutes, stop, get AWR
  dbms_session.sleep(p_dur_seconds);
  
  if is_awr > 0 then SYS.DBMS_WORKLOAD_REPOSITORY.create_snapshot('ALL'); end if;
  clearup_test;
end;
/


-- Start test run for 10 minutes
--   exec test_run(80, 1, 10*60, 1);   -- listen dequeue 
--   exec test_run(80, 2, 10*60, 1);   -- subscriber dequeue

Oracle Multi-Consumer AQ dequeue with dbms_aq.listen Performance Model (II)

Wait Events (I)           Performance Model (II)           Performance Analysis and Workaround (III)     


In the first Blog (I), we made test to show Wait Events: "library cache: mutex X" on queue object and "latch free" on Latch "channel operations parent latch".

In this second Blog (II), we will make dequeue test with GDB trace. Based on test outcome, we will build Latch and Mutex model, examine it against real DB stats and subsequently reveals implementation problems.

Note: tested on Oracle 19.17 in Linux


1. Library Cache Lock/Pin, Mutex Get, Latch Get


First we compose a GDB Script to trace dequeue stats (see Section 5 GDB Script).
Then we make tests with different scenarios.


1.1 Case-1: enqueue - dequeue


Open two Sqlplus sessions (SID-1 and SID-2). In SID-2, enqueue one message, execute gdb script on UNIX process of SID-1 (UNIX process pid: 27039), then on SID-1 make dequeue.

Here all output:

(SID-2) > exec enq(1);
  11:16:29 --- enqueue Message for KSUB_1, corr_id=KSUB_1, text=11:16:29.

$ > gdb -x gdb_cmd_lm_script1 -p 27039
	
(SID-1) > exec listen_deq(1, 60);
  11:17:10 >>> dequeue Message for KSUB_1, corr_id=KSUB_1, text=11:16:29.

$ > gdb -x gdb_cmd_lm_script1 -p 27039
  11:17:10
  ===== BreakPoint: 55-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 18 ] =====
GDB output shows that for one single message dequeue, it needs:

   2   Library Cache Lock (kgllkal)
   3   Library Cache Pin  (kgllkdl)
   3   Latch Gets         (ksl_get_shared_latch)
   18  Mutex Gets         (kglGetMutex)
   55  Breakooints        (55-Counter)
For all the output details, see Section 6. GDB Output of Case-1.


1.2 Case-2: dequeue - enqueue


We first start dequeue in SID-1 and gdb its process, then enqueue one message in SID-2. Since dequeue starts first, it is waiting for message in dbms_aq.listen (with timeout 60 seconds).

Here the test output:

$ > gdb -x gdb_cmd_lm_script1 -p 27039
  
(SID-1) > exec listen_deq(1, 60);
  
$ > gdb -x gdb_cmd_lm_script1 -p 27039
  11:26:18
  ===== BreakPoint: 38-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 12 ] =====

(SID-2) > exec enq(1);
	11:26:42 --- enqueue Message for KSUB_1, corr_id=KSUB_1, text=11:26:42.
	
(SID-1) > exec listen_deq(1, 60);
  11:26:42 >>> dequeue Message for KSUB_1, corr_id=KSUB_1, text=11:26:42.
  
$ > gdb -x gdb_cmd_lm_script1 -p 27039
  11:26:42
  ===== BreakPoint: 103-Counter: [ LOCK: 4, PIN: 5, LATCH: 9, MUTEX: 30 ] =====
gdb output shows stats at wait start:

   2   Library Cache Lock (kgllkal)
   2   Library Cache Pin  (kgllkdl)
   1   Latch Gets         (ksl_get_shared_latch)
   12  Mutex Gets         (kglGetMutex)
   38  Breakooints        (38-Counter)
gdb output shows stats after message dequeued:

   4   Library Cache Lock (kgllkal)
   5   Library Cache Pin  (kgllkdl)
   9   Latch Gets         (ksl_get_shared_latch)
   30  Mutex Gets         (kglGetMutex)
   103 Breakooints        (103-Counter)


1.3 Case-3: dequeue - enqueue other corr_id - enqueue


Similar to Case-2, but we make two enqueues, the first is enqueued to other corr_id (corr_id=KSUB_31), the second is enqueued for the waiting dequeue (corr_id=KSUB_1).

We can see that the first enqueue for corr_id=KSUB_31 also wakes up dequeue, and hence Latch and Mutex stats are increased:

$ > gdb -x gdb_cmd_lm_script1 -p 27039

(SID-1) > exec listen_deq(1, 60);

$ > gdb -x gdb_cmd_lm_script1 -p 27039
  11:36:19
  ===== BreakPoint: 38-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 12 ] =====

(SID-2) > exec enq(31);
  11:36:38 --- enqueue Message for KSUB_31, corr_id=KSUB_31, text=11:36:38.

$ > gdb -x gdb_cmd_lm_script1 -p 27039
  11:36:38
  ===== BreakPoint: 86-Counter: [ LOCK: 4, PIN: 4, LATCH: 7, MUTEX: 24 ] =====

(SID-2) > exec enq(1);
  11:37:12 --- enqueue Message for KSUB_1, corr_id=KSUB_1, text=11:37:12.

(SID-1) > exec listen_deq(1, 60);
  11:37:12 >>> dequeue Message for KSUB_1, corr_id=KSUB_1, text=11:37:12.

$ > gdb -x gdb_cmd_lm_script1 -p 27039
  11:37:12
  ===== BreakPoint: 151-Counter: [ LOCK: 6, PIN: 7, LATCH: 15, MUTEX: 42 ] =====
gdb output shows stats at wait start:

   2   Library Cache Lock (kgllkal)
   2   Library Cache Pin  (kgllkdl)
   1   Latch Gets         (ksl_get_shared_latch)
   12  Mutex Gets         (kglGetMutex)
   38  Breakooints        (38-Counter)
gdb output shows the first wake-up (false wake-up) stats:

   4   Library Cache Lock (kgllkal)
   4   Library Cache Pin  (kgllkdl)
   7   Latch Gets         (ksl_get_shared_latch)
   24  Mutex Gets         (kglGetMutex)
   86  Breakooints        (86-Counter)
the second wake-up (true wake-up) stats:

   6   Library Cache Lock (kgllkal)
   7   Library Cache Pin  (kgllkdl)
   15  Latch Gets         (ksl_get_shared_latch)
   42  Mutex Gets         (kglGetMutex)
   151 Breakooints        (151-Counter)


1.4 Case-4: dequeue with timeout


We start a dequeue, but no enqueue, and then wait dequeue timeout:

$ > gdb -x gdb_cmd_lm_script1 -p 27039

(SID-1) > exec listen_deq(1, 60);

$ > gdb -x gdb_cmd_lm_script1 -p 27039
  11:42:30
  ===== BreakPoint: 38-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 12 ] =====

(SID-1) > exec listen_deq(1, 60);
  11:43:30 -- Error for KSUB_1, ORA-25254: time-out in LISTEN while waiting for a message

$ > gdb -x gdb_cmd_lm_script1 -p 27039
  11:43:30
  ===== BreakPoint: 42-Counter: [ LOCK: 2, PIN: 2, LATCH: 3, MUTEX: 12 ] =====
gdb output shows the stats at wait start:

   2   Library Cache Lock (kgllkal)
   2   Library Cache Pin  (kgllkdl)
   1   Latch Gets         (ksl_get_shared_latch)
   12  Mutex Gets         (kglGetMutex)
   38  Breakooints        (38-Counter)
gdb output shows the timeout stats:

   2   Library Cache Lock (kgllkal)
   2   Library Cache Pin  (kgllkdl)
   3   Latch Gets         (ksl_get_shared_latch)
   12  Mutex Gets         (kglGetMutex)
   42  Breakooints        (42-Counter)


2. Performance Model and Analysis


According to above test outcome, we can estimate number of Latch and Mutex Gets, and build a performance analysis model for concurrent parallel sessions.

The numbers used in model are approximative and are based on the observations of real DB stats.

We examine the model against real DB collected stats and show the deviations.

In next Blog (III), we will make tests with different number of parallel sessions, and collect exact DB Latch and Mutex stats.


2.1. Mutex and Latch Formulas



Assume N: number of Sessions
       M: number of dequeued messages
       no timeout in dequeue (no repeated get)
       each message is only dequeued once and by one specified target session
        
       MGets: Number of Mutex Gets on Queue Object
       LGets: Number of Latch Gets on LATCH#=98 (channel operations parent latch)

For one dequeue (M=1) and no concurrent session (N=1):
       MGets = 12
       LGets =  6
       
For one dequeue (M=1) and N concurrent sessions:
       MGets = 12 * N
       LGets =  6 * N

For M dequeues and N concurrent sessions:
       MGets = 12 * N * M
       LGets =  6 * N * M


2.2. Concurrency Contentions


When one Message is enqueued, all sessions waiting with dbms_aq.listen are waked up simultaneously. They are concurrently contending for the DB-wide single Mutex (on Queue Object), and single Child Latch (CHILD#=144 of LATCH#=98, LATCH#=98 has 173 Children, but only 144 is used for dbms_aq.listen on any Queues).

Hence, dbms_aq.listen artificially "synchronize" the DB-wide single resource contentions.


2.3. Examination of Latch Formula on Real DB



2.3.1 Pick following data from AWR:



		  dequeue SQL Query Executions = 96,508	
		  Enq Msgs                     = 94,551
		  Deq Msgs                     = 94,551
		          
      Latch Name                       Get Requests   Misses     Sleeps   Spin Gets
      -------------------------------  -------------  ---------	 -------  ---------
      channel operations parent latch  43,999,351     3,234,815  130,568  3,107,867


2.3.2 Compute and Compare:



      Real      LGets (rLGets) = 43,999,351	
		  Estimated LGets (eLGets) = 6 * N * M
		                           = 6 * 64 * 96,508	
		                           = 37,059,072
Due to Latch Misses and Sleeps, Estimated LGets can be adjusted as:

		  Adjusted Estimated LGets = eLGets     + Misses
		                           = 37,059,072 + 3,234,815
		                           = 40,293,887
		                           
		  Estimation Error = 43,999,351 - 40,293,887 
		                   = 3,705,464 (8.42%)
Note that Latch has recurrent_sleeps:
		
		  recurrent_sleeps = sleeps  + spin_gets ñ misses
		                   = 130,568 + 3,107,867 - 3,234,815
		                   = 3,620
Because of recurrent_sleeps, Estimated LGets can be less than Real LGets.


3. dbms_aq.listen dequeue - child latch 144


For dequeue with dbms_aq.listen, following test shows that only child# 144 of Latch "channel operations parent latch" (latch#=98) is used. Each RUN needs 3 latch GETs.

For dequeue with subscriber, no such latch is needed.

-----===== enqueue 20 messages for KSUB_1 
begin
  for i in 1..20 loop
    enq(1);
  end loop;
end;
/

-----===== dequeue 10 messages by dbms_aq.listen =====
column latch_gets new_value latch_gets_1
column latch_children_gets new_value latch_children_gets_1

select 
  (select gets from v$latch where latch#=98) latch_gets,
  (select gets from v$latch_children where latch#=98 and child# = 144) latch_children_gets
from dual;

begin
  for i in 1..10 loop
    begin
      listen_deq(1, 10); 
    EXCEPTION WHEN others THEN
	    null;  --DBMS_OUTPUT.PUT_LINE ('---------------- No more messages for KSUB_'||1);
	  end;
  end loop;
end;
/

select 
  (select gets - &latch_gets_1 from v$latch where latch#=98) latch_gets_delta,
  (select gets - &latch_children_gets_1 from v$latch_children where latch#=98 and child# = 144) latch_children_gets_delta
from dual;

LATCH_GETS_DELTA LATCH_CHILDREN_GETS_DELTA
---------------- -------------------------
              31                        30

-----===== dequeue 10 messages by subscriber =====

select 
  (select gets from v$latch where latch#=98) latch_gets,
  (select gets from v$latch_children where latch#=98 and child# = 144) latch_children_gets
from dual;


begin
  for i in 1..10 loop
    begin
      sub_deq(1, 10); 
    EXCEPTION WHEN others THEN
	    null;  --DBMS_OUTPUT.PUT_LINE ('---------------- No more messages for KSUB_'||1);
	  end;
  end loop;
end;
/

select 
  (select gets - &latch_gets_1 from v$latch where latch#=98) latch_gets_delta,
  (select gets - &latch_children_gets_1 from v$latch_children where latch#=98 and child# = 144) latch_children_gets_delta
from dual;

LATCH_GETS_DELTA LATCH_CHILDREN_GETS_DELTA
---------------- -------------------------
               1                         0


4. dequeue waiting semtimedop


If we use strace to display dequeue waiting with dbms_aq.listen, we can see that the internal timeout of 1 second ("{1, 0}"):

SQL > exec listen_deq(32, 120);

$ > strace -ttp 19009
  strace: Process 19009 attached
  15:29:36.161830 semtimedop(9928704, [{63, -1, 0}], 1, {1, 0}) = -1 EAGAIN (Resource temporarily unavailable)
  15:29:37.162159 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={2, 747478}, ru_stime={0, 322189}, ...}) = 0
  15:29:37.162333 semtimedop(9928704, [{63, -1, 0}], 1, {1, 0}) = -1 EAGAIN (Resource temporarily unavailable)
  15:29:38.162152 semtimedop(9928704, [{63, -1, 0}], 1, {1, 0}) = -1 EAGAIN (Resource temporarily unavailable)
If we use strace to display dequeue waiting with subscriber, we can see that the internal timeout of 1 second ("{3, 0}"):

SQL > exec sub_deq(32, 120);

$ > strace -ttp 19009

  strace: Process 19009 attached
  15:38:37.236174 semtimedop(9928704, [{63, -1, 0}], 1, {4, 641000}) = -1 EAGAIN (Resource temporarily unavailable)
  15:38:41.237194 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={2, 764867}, ru_stime={0, 323688}, ...}) = 0
  15:38:41.237309 semtimedop(9928704, [{63, -1, 0}], 1, {3, 0}) = -1 EAGAIN (Resource temporarily unavailable)
  15:38:44.237145 semtimedop(9928704, [{63, -1, 0}], 1, {3, 0}) = -1 EAGAIN (Resource temporarily unavailable)
For all 4 test cases in Section 1, we can also use strace to momitor semtimedop. In particular for Case-3 (dequeue - enqueue other corr_id - enqueue), we can see the inter-process communication between different sessions. More info about semaphores can be showed by:

$ > ipcs -s
  ------ Semaphore Arrays --------
  key        semid      owner      perms      nsems
  0xe4774fb0 9928704    oracle     600        250
  0xe4774fb1 9961473    oracle     600        250
  0xe4774fb2 9994242    oracle     600        250

$ > ipcs -s -i 9928704
$ > ipcs -l


5. GDB Script


To run appended GDB script, we first need to find following three address.


5.1 Get Queue Object Handlr Addr: KGLLKHDL_ADDR



select addr, hash_value idn,  v.* from v$db_object_cache v where name = 'MSG_QUEUE_MULTIPLE'; 
-- 98417B58  198468696

select kgllkhdl, kglhdpar, v.* from sys.x_kgllk v where kglnaobj = 'MSG_QUEUE_MULTIPLE'; 
-- 98417B58  98417B58

select object_handle, mode_held, mode_requested, v.* from v$libcache_locks v where object_handle like '%98417B58';
-- 98417B58

Lock/pin mode held in v$libcache_locks:
    0 - No lock/pin held
    1 - Null mode
    2 - Share mode
    3 - Exclusive mode

Note: we use Blog:      
  Tracing Library Cache Locks 3: Object Names (Nenad Noveljic, March 1, 2021)
    (https://nenadnoveljic.com/blog/library-cache-lock-object-name/)
to printout library object name and schema: MSG_QUEUE_MULTIPLEK 


5.2 Get Queue Object Mutex Addr: KGLHDMTX_ADDR



select kglhdmtx, kglhdadr, v.* from sys.x_kglob v where kglnaobj = 'MSG_QUEUE_MULTIPLE'; 
--KGLHDMTX=98417CA8, KGLHDADR=98417B58

-- Note: each Mutex has a number of different Locations


5.3 Get Latch 'channel operations parent latch' (Latch#=144) Top Child# Addr: LATCHCHD_ADDR


We can get Top Child# Addr by query:

select * from (
  select addr addr_144, child# child#_144, v.* from v$latch_children v 
   where latch#=98 or name = 'channel operations parent latch' order by gets desc
) where rownum = 1;  
-- B4C33CF8  144
It can also be found by following test and query:

Open two Sqlplus sessions.

-- In Session_1, run 

SQL (SID-1) > exec listen_deq(1, 120);

-- Note: For dbms_aq.listen wait, the session is waiting on event: 'wait for unread message on broadcast channel'.
--       For dbms_aq.dequeue wait, the session is waiting on event: 'Streams AQ: waiting for messages in the queue'.

-- In Session_2, run query below to display CHILD#=144:

SQL (SID-1) > with sq as (select program, event, sid, p1text, p1 from v$session where event = 'wait for unread message on broadcast channel')
              select l.addr, child#, s.program, s.sid, s.p1 p1_num, (s.p1+72) p1_num_72, to_number(l.addr, 'XXXXXXXXXXXXXXXX') child_addr_dec, l.* 
               from v$latch_children l, sq s
               where (latch#=98 or name = 'channel operations parent latch')
                 and to_number(l.addr, 'XXXXXXXXXXXXXXXX') = s.p1 + 72;
                               
-- B4C33CF8 144

-- Note: Above query shows that Oracle background SVCB is also waiting on Event "unread message on broadcast channel"
--       Probably it also dbms_aq.listen (Latch#=98,  CHILD#=36) 
--       (SVCB is supposed only for Oracle RAC, but we observed it also in non-RAC DB (Oracle Docu error)) 


-- For ksl_get_shared_latch, see Blog: 
--   Oracle 12 and latches (https://fritshoogland.wordpress.com/2015/07/17/oracle-12-and-latches/):

The kslgetl and ksl_get_shared_latch functions take the following arguments:
    1-latch address
    2-immediate get (0 means yes, 1 means no)
    3-where (X$KSLLW.INDX)
    4-why (X$KSLWSC.INDX)
    5-mode (8=shared,16=exclusive; only for ksl_get_shared_latch function)


5.4 Stats Counter


For each Get operation (kgllkal, kglpin, kglGetMutex, ksl_get_shared_latch), we increase its respective Counter by 1. For each Release operation (kgllkdl, kglpndl, kglReleaseMutex, kslfre), the respective current Counter value is used.

After each operation, we printout all 4 Counter values.


5.5 GDB Script Code


With three above addresses, we can compose following GDB script:

------------------------- gdb -x gdb_cmd_lm_script1 -p 3214 -------------------------

set pagination off
set logging file mutex-latch_test1.log
set logging overwrite on
set logging on
set $KGLLKHDL_ADDR=0X98417B58
set $KGLHDMTX_ADDR=0X98417CA8
set $LATCHCHD_ADDR=0XB4C33CF8
set $s = 0
set $k = 0, $kr = 0
set $p = 0, $pr = 0
set $l = 0, $lr = 0
set $m = 0, $mr = 0

define PrtCont
  bt $arg0
  shell date
  printf "===== BreakPoint: %i-Counter: [ LOCK: %i, PIN: %i, LATCH: %i, MUTEX: %i ] =====\n", ++$s, $k, $p, $l, $m
  continue
end

# kgllkal kglLock kglget
break *kgllkal if $rdx==$KGLLKHDL_ADDR
command
printf "===== Lock Get Seq: %i -- kgllkal (kgllkhdl Addr(rdx)=>%X, Mode(rcx)=>%X), kglnaobj_and_schema: %s --\n", ++$k, $rdx, $rcx, ($rdx+0x1c0)
set $kr = 0
PrtCont 10
end

# kgllkdl kglUnLock
break *kgllkdl if $r8==$KGLLKHDL_ADDR
command
printf "===== Lock Rel Seq: %i-%i -- kgllkdl (kgllkhdl Addr(r8)=>%X) ---\n", $k, ++$kr, $r8
PrtCont 3
end

break *kglpin if $rcx==$KGLLKHDL_ADDR
command
printf "===== Pin Get Seq: %i -- kglpin (kgllkhdl Addr(rcx)=>%X, Mode(r8)=>%X, mutex Addr(r11)=>%X)--\n", ++$p, $rcx, $r8, $r11
set $pr = 0
PrtCont 3
end

#kglUnPin
break *kglpndl if $r8==$KGLLKHDL_ADDR
command
printf "===== Pin Rel Seq: %i-%i -- kglpndl(kgllkhdl Addr(r8)=>%X) --\n", $p, ++$pr, $r8
PrtCont 3
end

break *kglGetMutex if $rsi==$KGLHDMTX_ADDR
command
printf "===== Mutext Get Seq: %i -- kglGetMutex(kglhdmtx Addr(rsi)=>%X, Location(r8)=>%i) --\n", ++$m, $rsi, $r8
set $mr = 0
PrtCont 3
end

break *kglReleaseMutex if $rsi==$KGLHDMTX_ADDR
command
printf "===== Mutext Rel Seq: %i-%i -- kglReleaseMutex(kglhdmtx Addr(rsi)=>%X, Location(r8)=>%i) --\n", $m, ++$mr, $rsi, $r8
PrtCont 3
end

break *ksl_get_shared_latch if $rdi==$LATCHCHD_ADDR
command
printf "===== Latch Get Seq: %i -- ksl_get_shared_latch Addr(Latch-98 Child-144 Addr(rdi)=>%X, Mode(r8)=>%i) --\n", ++$l, $rdi, $r8
set $lr = 0
PrtCont 3
end

# kslgetl not used

break *kslfre if $rdi==$LATCHCHD_ADDR
command
printf "===== Latch Fre Seq: %i-%i -- kslfre (Latch-98 Child-144 Addr(rdi)=>%X) --\n", $l, ++$lr, $rdi
PrtCont 3
end


6. GDB Output of Case-1



Note: "kwqilisten" is Oracle subroutine for dbms_aq.LISTEN.


===== Mutext Get Seq: 1 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>1) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130ac47e in kglLock ()
#2  0x00000000130a70c5 in kglget ()
===== BreakPoint: 1-Counter: [ LOCK: 0, PIN: 0, LATCH: 0, MUTEX: 1 ] =====

===== Lock Get Seq: 1 -- kgllkal (kgllkhdl Addr(rdx)=>98417B58, Mode(rcx)=>2), kglnaobj_and_schema: MSG_QUEUE_MULTIPLEK --
#0  0x00000000130b0b40 in kgllkal ()
#1  0x00000000130ac291 in kglLock ()
#2  0x00000000130a70c5 in kglget ()
#3  0x0000000004c04809 in kglgob ()
#4  0x0000000001b1c3ed in kwqicgob ()
#5  0x0000000001b1b7d8 in kwqicrqo0 ()
#6  0x000000000f70f1df in kwqidlqo ()
#7  0x00000000080594f7 in kwqiccllst ()
#8  0x000000000807351a in kwqilintl ()
#9  0x0000000008073c0a in kwqilisten ()
===== BreakPoint: 2-Counter: [ LOCK: 1, PIN: 0, LATCH: 0, MUTEX: 1 ] =====

===== Mutext Rel Seq: 1-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>4) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b21e7 in kglLoadOnLock ()
#2  0x00000000130b0e90 in kgllkal ()
===== BreakPoint: 3-Counter: [ LOCK: 1, PIN: 0, LATCH: 0, MUTEX: 1 ] =====

===== Mutext Rel Seq: 1-2 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>4) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b0ea0 in kgllkal ()
#2  0x00000000130ac291 in kglLock ()
===== BreakPoint: 4-Counter: [ LOCK: 1, PIN: 0, LATCH: 0, MUTEX: 1 ] =====

===== Mutext Rel Seq: 1-3 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>4) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130ac35c in kglLock ()
#2  0x00000000130a70c5 in kglget ()
===== BreakPoint: 5-Counter: [ LOCK: 1, PIN: 0, LATCH: 0, MUTEX: 1 ] =====

===== Pin Get Seq: 1 -- kglpin (kgllkhdl Addr(rcx)=>98417B58, Mode(r8)=>2, mutex Addr(r11)=>98417CA8)--
#0  0x00000000130a7e20 in kglpin ()
#1  0x0000000004c048c8 in kglgob ()
#2  0x0000000001b1c3ed in kwqicgob ()
===== BreakPoint: 6-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 1 ] =====

===== Mutext Get Seq: 2 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>4) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130a80dd in kglpin ()
#2  0x0000000004c048c8 in kglgob ()
===== BreakPoint: 7-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 2 ] =====

===== Mutext Rel Seq: 2-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>0) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130a817e in kglpin ()
#2  0x0000000004c048c8 in kglgob ()
===== BreakPoint: 8-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 2 ] =====

===== Mutext Get Seq: 3 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>90) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130b4c4f in kglpnal ()
#2  0x00000000130a82b9 in kglpin ()
===== BreakPoint: 9-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 3 ] =====

===== Mutext Get Seq: 4 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>71) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130afef5 in kglobpn ()
#2  0x00000000130aab2a in kglpim ()
===== BreakPoint: 10-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 4 ] =====

===== Mutext Rel Seq: 4-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>1) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b0200 in kglobpn ()
#2  0x00000000130aab2a in kglpim ()
===== BreakPoint: 11-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 4 ] =====

===== Mutext Rel Seq: 4-2 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>1) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130a858b in kglpin ()
#2  0x0000000004c048c8 in kglgob ()
===== BreakPoint: 12-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 4 ] =====

===== Pin Rel Seq: 1-1 -- kglpndl(kgllkhdl Addr(r8)=>98417B58) --
#0  0x00000000130b6100 in kglpndl ()
#1  0x00000000130a7756 in kglUnPin ()
#2  0x000000000f70f6bb in kwqidfqo ()
===== BreakPoint: 13-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 4 ] =====

===== Mutext Get Seq: 5 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>95) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130b63c7 in kglpndl ()
#2  0x00000000130a7756 in kglUnPin ()
===== BreakPoint: 14-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 5 ] =====

===== Mutext Rel Seq: 5-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>-2022442648) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b66c9 in kglpndl ()
#2  0x00000000130a7756 in kglUnPin ()
===== BreakPoint: 15-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 5 ] =====

===== Lock Rel Seq: 1-1 -- kgllkdl (kgllkhdl Addr(r8)=>98417B58) ---
#0  0x00000000130b34d0 in kgllkdl ()
#1  0x00000000130ae53c in kglUnLock ()
#2  0x000000000f70f6c6 in kwqidfqo ()
===== BreakPoint: 16-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 5 ] =====

===== Mutext Get Seq: 6 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>85) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130b36be in kgllkdl ()
#2  0x00000000130ae53c in kglUnLock ()
===== BreakPoint: 17-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 6 ] =====

===== Mutext Rel Seq: 6-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>4) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b39f1 in kgllkdl ()
#2  0x00000000130ae53c in kglUnLock ()
===== BreakPoint: 18-Counter: [ LOCK: 1, PIN: 1, LATCH: 0, MUTEX: 6 ] =====

===== Latch Get Seq: 1 -- ksl_get_shared_latch Addr(Latch-98 Child-144 Addr(rdi)=>B4C33CF8, Mode(r8)=>16) --
#0  0x00000000128a3a90 in ksl_get_shared_latch ()
#1  0x000000000128c4e3 in ksrsubscribe ()
#2  0x000000000f71e4fb in kwqidlisten ()
===== BreakPoint: 19-Counter: [ LOCK: 1, PIN: 1, LATCH: 1, MUTEX: 6 ] =====

===== Latch Fre Seq: 1-1 -- kslfre (Latch-98 Child-144 Addr(rdi)=>B4C33CF8) --
#0  0x00000000128aa290 in kslfre ()
#1  0x000000000128c5ae in ksrsubscribe ()
#2  0x000000000f71e4fb in kwqidlisten ()
===== BreakPoint: 20-Counter: [ LOCK: 1, PIN: 1, LATCH: 1, MUTEX: 6 ] =====

===== Mutext Get Seq: 7 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>1) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130ac47e in kglLock ()
#2  0x00000000130a70c5 in kglget ()
===== BreakPoint: 21-Counter: [ LOCK: 1, PIN: 1, LATCH: 1, MUTEX: 7 ] =====

===== Lock Get Seq: 2 -- kgllkal (kgllkhdl Addr(rdx)=>98417B58, Mode(rcx)=>2), kglnaobj_and_schema: MSG_QUEUE_MULTIPLEK --
#0  0x00000000130b0b40 in kgllkal ()
#1  0x00000000130ac291 in kglLock ()
#2  0x00000000130a70c5 in kglget ()
#3  0x0000000004c04809 in kglgob ()
#4  0x0000000001b1c3ed in kwqicgob ()
#5  0x0000000001b1b7d8 in kwqicrqo0 ()
#6  0x000000000f70f1df in kwqidlqo ()
#7  0x000000000f70f938 in kwqidlqowl ()
#8  0x0000000003f5b335 in kwqididqx ()
#9  0x000000000f705bba in kwqididq ()
===== BreakPoint: 22-Counter: [ LOCK: 2, PIN: 1, LATCH: 1, MUTEX: 7 ] =====

===== Mutext Rel Seq: 7-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>4) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b21e7 in kglLoadOnLock ()
#2  0x00000000130b0e90 in kgllkal ()
===== BreakPoint: 23-Counter: [ LOCK: 2, PIN: 1, LATCH: 1, MUTEX: 7 ] =====

===== Mutext Rel Seq: 7-2 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>4) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b0ea0 in kgllkal ()
#2  0x00000000130ac291 in kglLock ()
===== BreakPoint: 24-Counter: [ LOCK: 2, PIN: 1, LATCH: 1, MUTEX: 7 ] =====

===== Mutext Rel Seq: 7-3 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>4) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130ac35c in kglLock ()
#2  0x00000000130a70c5 in kglget ()
===== BreakPoint: 25-Counter: [ LOCK: 2, PIN: 1, LATCH: 1, MUTEX: 7 ] =====

===== Pin Get Seq: 2 -- kglpin (kgllkhdl Addr(rcx)=>98417B58, Mode(r8)=>2, mutex Addr(r11)=>98417CA8)--
#0  0x00000000130a7e20 in kglpin ()
#1  0x0000000004c048c8 in kglgob ()
#2  0x0000000001b1c3ed in kwqicgob ()
===== BreakPoint: 26-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 7 ] =====

===== Mutext Get Seq: 8 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>4) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130a80dd in kglpin ()
#2  0x0000000004c048c8 in kglgob ()
===== BreakPoint: 27-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 8 ] =====

===== Mutext Rel Seq: 8-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>0) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130a817e in kglpin ()
#2  0x0000000004c048c8 in kglgob ()
===== BreakPoint: 28-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 8 ] =====

===== Mutext Get Seq: 9 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>90) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130b4c4f in kglpnal ()
#2  0x00000000130a82b9 in kglpin ()
===== BreakPoint: 29-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 9 ] =====

===== Mutext Get Seq: 10 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>71) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130afef5 in kglobpn ()
#2  0x00000000130aab2a in kglpim ()
===== BreakPoint: 30-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 10 ] =====

===== Mutext Rel Seq: 10-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>1) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b0200 in kglobpn ()
#2  0x00000000130aab2a in kglpim ()
===== BreakPoint: 31-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 10 ] =====

===== Mutext Rel Seq: 10-2 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>1) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130a858b in kglpin ()
#2  0x0000000004c048c8 in kglgob ()
===== BreakPoint: 32-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 10 ] =====

===== Pin Rel Seq: 2-1 -- kglpndl(kgllkhdl Addr(r8)=>98417B58) --
#0  0x00000000130b6100 in kglpndl ()
#1  0x00000000130a7756 in kglUnPin ()
#2  0x000000000f70f6bb in kwqidfqo ()
===== BreakPoint: 33-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 10 ] =====

===== Mutext Get Seq: 11 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>95) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130b63c7 in kglpndl ()
#2  0x00000000130a7756 in kglUnPin ()
===== BreakPoint: 34-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 11 ] =====

===== Mutext Rel Seq: 11-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>-2022442648) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b66c9 in kglpndl ()
#2  0x00000000130a7756 in kglUnPin ()
===== BreakPoint: 35-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 11 ] =====

===== Lock Rel Seq: 2-1 -- kgllkdl (kgllkhdl Addr(r8)=>98417B58) ---
#0  0x00000000130b34d0 in kgllkdl ()
#1  0x00000000130ae53c in kglUnLock ()
#2  0x000000000f70f6c6 in kwqidfqo ()
===== BreakPoint: 36-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 11 ] =====

===== Mutext Get Seq: 12 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>85) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130b36be in kgllkdl ()
#2  0x00000000130ae53c in kglUnLock ()
===== BreakPoint: 37-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 12 ] =====

===== Mutext Rel Seq: 12-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>4) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b39f1 in kgllkdl ()
#2  0x00000000130ae53c in kglUnLock ()
===== BreakPoint: 38-Counter: [ LOCK: 2, PIN: 2, LATCH: 1, MUTEX: 12 ] =====

===== Latch Get Seq: 2 -- ksl_get_shared_latch Addr(Latch-98 Child-144 Addr(rdi)=>B4C33CF8, Mode(r8)=>16) --
#0  0x00000000128a3a90 in ksl_get_shared_latch ()
#1  0x000000000128dc58 in ksrchreset ()
#2  0x000000000f71eae7 in kwqidlisten ()
===== BreakPoint: 39-Counter: [ LOCK: 2, PIN: 2, LATCH: 2, MUTEX: 12 ] =====

===== Latch Fre Seq: 2-1 -- kslfre (Latch-98 Child-144 Addr(rdi)=>B4C33CF8) --
#0  0x00000000128aa290 in kslfre ()
#1  0x000000000128dd92 in ksrchreset ()
#2  0x000000000f71eae7 in kwqidlisten ()
===== BreakPoint: 40-Counter: [ LOCK: 2, PIN: 2, LATCH: 2, MUTEX: 12 ] =====

===== Latch Get Seq: 3 -- ksl_get_shared_latch Addr(Latch-98 Child-144 Addr(rdi)=>B4C33CF8, Mode(r8)=>16) --
#0  0x00000000128a3a90 in ksl_get_shared_latch ()
#1  0x000000000128ddc5 in ksrchreset ()
#2  0x000000000f71eae7 in kwqidlisten ()
===== BreakPoint: 41-Counter: [ LOCK: 2, PIN: 2, LATCH: 3, MUTEX: 12 ] =====

===== Latch Fre Seq: 3-1 -- kslfre (Latch-98 Child-144 Addr(rdi)=>B4C33CF8) --
#0  0x00000000128aa290 in kslfre ()
#1  0x000000000128de19 in ksrchreset ()
#2  0x000000000f71eae7 in kwqidlisten ()
===== BreakPoint: 42-Counter: [ LOCK: 2, PIN: 2, LATCH: 3, MUTEX: 12 ] =====

===== Mutext Get Seq: 13 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>1) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130ac47e in kglLock ()
#2  0x00000000130a70c5 in kglget ()
===== BreakPoint: 43-Counter: [ LOCK: 2, PIN: 2, LATCH: 3, MUTEX: 13 ] =====

===== Mutext Rel Seq: 13-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>626987360) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130ac35c in kglLock ()
#2  0x00000000130a70c5 in kglget ()
===== BreakPoint: 44-Counter: [ LOCK: 2, PIN: 2, LATCH: 3, MUTEX: 13 ] =====

===== Pin Get Seq: 3 -- kglpin (kgllkhdl Addr(rcx)=>98417B58, Mode(r8)=>2, mutex Addr(r11)=>98417B58)--
#0  0x00000000130a7e20 in kglpin ()
#1  0x00000000130a7af7 in kglpnp ()
#2  0x0000000004c062fb in kglgpr ()
===== BreakPoint: 45-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 13 ] =====

===== Mutext Get Seq: 14 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>4) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130a80dd in kglpin ()
#2  0x00000000130a7af7 in kglpnp ()
===== BreakPoint: 46-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 14 ] =====

===== Mutext Rel Seq: 14-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>0) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130a817e in kglpin ()
#2  0x00000000130a7af7 in kglpnp ()
===== BreakPoint: 47-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 14 ] =====

===== Mutext Get Seq: 15 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>90) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130b4c4f in kglpnal ()
#2  0x00000000130a82b9 in kglpin ()
===== BreakPoint: 48-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 15 ] =====

===== Mutext Get Seq: 16 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>71) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130afef5 in kglobpn ()
#2  0x00000000130aab2a in kglpim ()
===== BreakPoint: 49-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 16 ] =====

===== Mutext Get Seq: 17 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>72) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130b0033 in kglobpn ()
#2  0x00000000130aab2a in kglpim ()
===== BreakPoint: 50-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 17 ] =====

===== Mutext Rel Seq: 17-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>17) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b0200 in kglobpn ()
#2  0x00000000130aab2a in kglpim ()
===== BreakPoint: 51-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 17 ] =====

===== Mutext Rel Seq: 17-2 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>17) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130a858b in kglpin ()
#2  0x00000000130a7af7 in kglpnp ()
===== BreakPoint: 52-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 17 ] =====

===== Pin Rel Seq: 3-1 -- kglpndl(kgllkhdl Addr(r8)=>98417B58) --
#0  0x00000000130b6100 in kglpndl ()
#1  0x00000000130a7756 in kglUnPin ()
#2  0x0000000008066b4d in kwqidxdeq0 ()
===== BreakPoint: 53-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 17 ] =====

===== Mutext Get Seq: 18 -- kglGetMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>95) --
#0  0x00000000130b9c20 in kglGetMutex ()
#1  0x00000000130b63c7 in kglpndl ()
#2  0x00000000130a7756 in kglUnPin ()
===== BreakPoint: 54-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 18 ] =====

===== Mutext Rel Seq: 18-1 -- kglReleaseMutex(kglhdmtx Addr(rsi)=>98417CA8, Location(r8)=>-2022442648) --
#0  0x00000000130b9430 in kglReleaseMutex ()
#1  0x00000000130b66c9 in kglpndl ()
#2  0x00000000130a7756 in kglUnPin ()
===== BreakPoint: 55-Counter: [ LOCK: 2, PIN: 3, LATCH: 3, MUTEX: 18 ] =====