Thursday, April 18, 2024

Oracle AQ Dequeue 'KTC latch subh' Memory Leak and Dequeue Long Running Test

Following previous Blog: "Oracle dbms_aq.dequeue_array Shared Pool "KTC Latch Subh" Memory Leak" (http://ksun-oracle.blogspot.com/2021/11/oracle-dbmsaqdequeuearray-shared-pool.html), we will reproduce another case of AQ Dequeue 'KTC latch subh' Memory Leak test in Orale 19.11 with Patch 31666684, and shows dequeue long (hours) running.

Note: Tested in 19.11 with following DB settings (also reproduced in 19.22. In 19.22, there is no Patch 31666684):

  Patch 31666684 (Patch Description: MEMORY LEAK KTC LATCH SUBH  ON AQ DEQUEUE)
 
  -- set small shared_pool to observe memory resize between shared pool and buffer cache due to "KTC latch subh"
  alter system set shared_pool_size=1008M scope=spfile;  
  
  -- set 3 subpools in shared_pool
  alter system set "_kghdsidx_count"=3 scope=spfile; 
  
  -- disable autosga to generate ORA-04031
  --alter system set "_memory_imm_mode_without_autosga"=false;


1. Test Setup



drop type k.t_test_obj force;
					      
create or replace noneditionable type k.t_test_obj is object (id integer, type_id integer);
/  					

begin sys.dbms_aqadm.drop_queue_table(queue_table => 'K.Q_TEST_TAB', force=> TRUE); end;
/

begin
  sys.dbms_aqadm.create_queue_table
  ( queue_table        => 'K.Q_TEST_TAB'
   ,queue_payload_type => 'K.T_TEST_OBJ'
   ,compatible         => '10.0.0'  --'8.1'
   ,storage_clause     => 'nocompress
                           tablespace u1'
   ,sort_list          => 'PRIORITY,ENQ_TIME'
   ,multiple_consumers =>  false
   ,message_grouping   =>  0
   ,comment            =>  'MEMORY LEAK KTC LATCH SUBH AQ test'
   ,secure             =>  false
   );
end;
/

begin
  sys.dbms_aqadm.stop_queue (queue_name => 'K.TEST_QUEUE');
  sys.dbms_aqadm.drop_queue (queue_name => 'K.TEST_QUEUE');
end;
/

begin
  sys.dbms_aqadm.create_queue
  ( queue_name      => 'K.TEST_QUEUE'
   ,queue_table     => 'K.Q_TEST_TAB'
   ,queue_type      => sys.dbms_aqadm.normal_queue
   ,max_retries     => 100
   ,retry_delay     => 2
   ,retention_time  => 604800
   ,comment         => 'AQ Queue Test'
   );
end;
/

begin sys.dbms_aqadm.start_queue(queue_name => 'K.TEST_QUEUE', enqueue => true, dequeue => true); end;
/

create or replace procedure test_enq(p_cnt number := 1, p_sleep number := 0) as
  l_enqueue_options     dbms_aq.enqueue_options_t;
  l_message_properties  dbms_aq.message_properties_t;
  l_message_handle      raw(16);
  l_msg                 t_test_obj;
begin 
  --visibility: 
  --   dbms_aq.immediate: no commit is required, message is enqueued and committed for dequeue immediately in an autonomous transaction.
  --   dbms_aq.on_commit (default): commit is required, message is visibile for dequeue after COMMIT. enqueue session has an open transaction before commit.
  --l_enqueue_options.visibility    := dbms_aq.immediate;
   
  for i in 1..p_cnt loop
    l_msg := t_test_obj(i, 2);
            
    dbms_aq.enqueue(queue_name         => 'K.TEST_QUEUE',
                    enqueue_options    => l_enqueue_options,
                    message_properties => l_message_properties,
                    payload            => l_msg,
                    msgid              => l_message_handle);
    --commit;  -- no commit
    
    if p_sleep > 0 then
      dbms_lock.sleep(p_sleep);
    end if;
  end loop;
end;
/
					
					
create or replace procedure test_deq(p_cnt number := 1, p_dur number := 0, p_sleep number := 0) as
  l_dequeue_options     dbms_aq.dequeue_options_t;
  l_message_properties  dbms_aq.message_properties_t;
  l_message_handle      raw(16);
  l_msg                 t_test_obj;
  l_cnt   number := 0;
begin
  for i in 1..p_cnt loop
    begin 
   	  l_dequeue_options.wait := p_dur;
   	  if p_sleep > 0 then
   	  	 dbms_lock.sleep(p_sleep);
   	  end if;
   	  
      dbms_aq.dequeue(queue_name         => 'K.TEST_QUEUE',
                      dequeue_options    => l_dequeue_options,
                      message_properties => l_message_properties,
                      payload            => l_msg,
                      msgid              => l_message_handle);
            
      --dbms_output.put_line ('id = ' || l_msg.id || ', type_id = ' || l_msg.type_id);
      
      --commit;  -- no commit
      l_cnt := l_cnt + 1;
    exception when others then null;
    end;
  end loop;
	dbms_output.put_line ('l_cnt = ' || l_cnt);
end;
/					        

-- purge queue for repeated test
declare
	po dbms_aqadm.aq$_purge_options_t;
begin
	dbms_aqadm.purge_queue_table('K.Q_TEST_TAB', null, po);
end;
/


2. Test Run


We open 3 Sqlplus sessions.
In SID-1, we enqueue 1e6 messages with commit.
In SID-2, dequeue all 1e6 messages without commit to observe 'KTC latch subh' Memory Leak.
Then in SID-3, dequeue one portion (1e3) of messages to show dequeue long running.


3.1. Queue Filling: SID-1 Enqueue with commit



SQL > exec test_enq(1e6);

    Elapsed: 00:01:55.93
    
SQL > commit;
		
SQL > select segment_name, segment_type, bytes from dba_segments where segment_name like '%Q_TEST_TAB%';

    SEGMENT_NAME         SEGMENT_TYPE            BYTES
    -------------------- ------------------ ----------
    Q_TEST_TAB           TABLE               125829120
    AQ$_Q_TEST_TAB_T     INDEX                   65536
    AQ$_Q_TEST_TAB_I     INDEX                51380224

SQL > select count(*) from q_test_tab;

      COUNT(*)
    ----------
       1000000


3.2. AQ Dequeue 'KTC latch subh' Memory Leak: SID-2 Dequeue without commit



SQL > select round(bytes/1024/1024, 2) mb, t.* from v$sgastat t where pool='shared pool' and name = 'KTC latch subh';

      MB  POOL         NAME             BYTES
    ----  -----------  --------------  ------
     .04  shared pool  KTC latch subh   46992

SQL > exec test_deq(1e6);

    l_cnt =1000000
    PL/SQL procedure successfully completed.
    Elapsed: 01:26:32.57

SQL > select round(bytes/1024/1024, 2) mb, t.* from v$sgastat t where pool='shared pool' and name = 'KTC latch subh';
                                 
      MB POOL         NAME                   BYTES
    ---- -----------  -------------- -------------
     992 shared pool  KTC latch subh 1,039,969,240
We can list all SGA memory resize operations:

SQL > select component, parameter, oper_type, initial_size, final_size, final_size-initial_size delta, start_time, end_time 
        from v$memory_resize_ops where parameter in ('db_cache_size', 'shared_pool_size') order by start_time, parameter;     

    COMPONENT              PARAMETER          OPER_TYPE    INITIAL_SIZE       FINAL_SIZE          DELTA  START_TI  END_TIME
    ---------------------  -----------------  ---------- -------------- ---------------- --------------  --------  --------
    DEFAULT buffer cache   db_cache_size      STATIC                  0    2,097,152,000  2,097,152,000  13:46:14  13:46:14
    shared pool            shared_pool_size   STATIC                  0    1,056,964,608  1,056,964,608  13:46:14  13:46:14
    DEFAULT buffer cache   db_cache_size      SHRINK      2,097,152,000    2,080,374,784    -16,777,216  14:31:36  14:31:36
    shared pool            shared_pool_size   GROW        1,056,964,608    1,073,741,824     16,777,216  14:31:36  14:31:36
    DEFAULT buffer cache   db_cache_size      SHRINK      2,080,374,784    2,063,597,568    -16,777,216  14:32:37  14:32:37
    shared pool            shared_pool_size   GROW        1,073,741,824    1,090,519,040     16,777,216  14:32:37  14:32:37
    DEFAULT buffer cache   db_cache_size      SHRINK      2,063,597,568    2,046,820,352    -16,777,216  14:33:41  14:33:41
    shared pool            shared_pool_size   GROW        1,090,519,040    1,107,296,256     16,777,216  14:33:41  14:33:41
    .....                                                                                                          
    shared pool            shared_pool_size   GROW        1,694,498,816    1,711,276,032     16,777,216  15:34:06  15:34:06
    DEFAULT buffer cache   db_cache_size      SHRINK      1,442,840,576    1,426,063,360    -16,777,216  15:36:18  15:36:18
    shared pool            shared_pool_size   GROW        1,711,276,032    1,728,053,248     16,777,216  15:36:18  15:36:18
    DEFAULT buffer cache   db_cache_size      SHRINK      1,426,063,360    1,409,286,144    -16,777,216  15:38:33  15:38:33
    shared pool            shared_pool_size   GROW        1,728,053,248    1,744,830,464     16,777,216  15:38:33  15:38:33
    DEFAULT buffer cache   db_cache_size      SHRINK      1,409,286,144    1,392,508,928    -16,777,216  15:41:14  15:41:14
    shared pool            shared_pool_size   GROW        1,744,830,464    1,761,607,680     16,777,216  15:41:14  15:41:14
    
    88 rows selected.
There are 42 resize operations from 14:31:36 to 15:41:14, each time shift 16MB from DEFAULT buffer cache to shared pool. The memory changes are:

	shared_pool_size  GRO 1,008M	-> 1,680M
	db_cache_size   	SHR 2,000M  -> 1,328M
	KTC latch subh	          0M  ->	 992M	
By the way, we can observe many ORA-04031 errors if disable SGA auto resize with:

  -- disable autosga to generate ORA-04031
  alter system set "_memory_imm_mode_without_autosga"=false;


3.3. Dequeue Long Running: SID-3 Dequeue


We can see 7 hours long dequeue by:

SQL > exec test_deq(1e3);

    l_cnt =0
    PL/SQL procedure successfully completed.
    Elapsed: 07:51:02.15
If we trace SID-3, they are all about 'db file sequential read' on file#=3 (UNDO file) to construct CR block of AQ Queue messages.

PARSING IN CURSOR #139862144662288 len=587 dep=1 uid=0 oct=3 lid=0 tim=25914746826315 hv=3930832701 ad='7f7cb148' sqlid='1y072amp4rgtx'
select  /*+ INDEX(TAB AQ$_Q_TEST_TAB_I) */   tab.rowid, tab.msgid, tab.corrid, tab.priority, tab.delay,   tab.expiration ,tab.retry_count, 
tab.exception_qschema,   tab.exception_queue, tab.chain_no, tab.local_order_no, tab.enq_time,   tab.time_manager_info, tab.state, 
tab.enq_tid, tab.step_no,   tab.sender_name, tab.sender_address, tab.sender_protocol,   tab.dequeue_msgid, tab.user_prop, tab.user_data   
from "K"."Q_TEST_TAB" tab  where q_name = :1 and (state = :2  )  order by q_name, state, priority, enq_time, step_no, chain_no, 
local_order_no for update skip locked
END OF STMT

EXEC #139862144662288:c=12383,e=39484,p=4,cr=55,cu=0,mis=1,r=0,dep=1,og=1,plh=1032419615,tim=25914746866093
WAIT #139862144662288: nam='db file sequential read' ela= 5849 file#=3 block#=621 blocks=1 obj#=0 tim=25914746872095
WAIT #139862144662288: nam='db file sequential read' ela= 5476 file#=3 block#=620 blocks=1 obj#=0 tim=25914746877756
WAIT #139862144662288: nam='db file sequential read' ela= 11281 file#=3 block#=619 blocks=1 obj#=0 tim=25914746889170
WAIT #139862144662288: nam='db file sequential read' ela= 5466 file#=3 block#=618 blocks=1 obj#=0 tim=25914746894797
WAIT #139862144662288: nam='db file sequential read' ela= 5655 file#=3 block#=617 blocks=1 obj#=0 tim=25914746900588
WAIT #139862144662288: nam='db file sequential read' ela= 5374 file#=3 block#=616 blocks=1 obj#=0 tim=25914746906117
WAIT #139862144662288: nam='db file sequential read' ela= 5522 file#=3 block#=615 blocks=1 obj#=0 tim=25914746911793
WAIT #139862144662288: nam='db file sequential read' ela= 5488 file#=3 block#=614 blocks=1 obj#=0 tim=25914746917451
WAIT #139862144662288: nam='db file sequential read' ela= 108815 file#=3 block#=613 blocks=1 obj#=0 tim=25914747026421