Thursday, July 10, 2025

Blog List

"library cache: bucket mutex X" on V$ Fixed Views: Case Test

Oracle DB experienced heavy "library cache: bucket mutex X" when V$ Fixed Views were queried frequently,
which impeded Oracle normal functions like background processes Mnnn and MZnn.

Note: Tested on Oracle 19c.

1. Test Setup

In the following code, we query V$LIBCACHE_LOCKS (similar behavior for DBA_KGLLOCK), which is a union of X$KGLLK and X$KGLPN.


create or replace procedure test_bucket_mutex(p_job_id number, p_loop_count number) as
begin
  for i in 1..p_loop_count loop
    for c in (select * from V$LIBCACHE_LOCKS where rownum <= p_job_id) loop
      null;
    end loop;
  end loop;
end;
/

-- exec test_bucket_mutex(1, 1);

create or replace procedure test_bucket_mutex_jobs(p_job_count number, p_loop_count number) as
begin
  for i in 1..p_job_count loop
    dbms_scheduler.create_job (
      job_name        => 'TEST_JOB_'||i,
      job_type        => 'PLSQL_BLOCK',
      job_action      => 'begin test_bucket_mutex('||i||', '||p_loop_count||'); end;',    
      start_date      => systimestamp,
      --repeat_interval => 'systimestamp',
      auto_drop       => true,
      enabled         => true);
  end loop;
end;
/

2. Test Run

Run a test with 16 Jobs:


exec test_bucket_mutex_jobs(16, 1e4);

AWR shows:

Top 10 Foreground Events by Total Wait Time

Event	Waits	Total Wait Time (sec)	Avg Wait	% DB time	Wait Class
library cache: bucket mutex X	939,402	17.1K	18.22ms	59.8	Concurrency
DB CPU		7062.2		24.7
library cache: mutex X	5,139	69.3	13.48ms	.2	Concurrency
control file sequential read	20,874	2.9	137.27us	.0	System I/O
db file sequential read	3,195	1.8	557.97us	.0	User I/O
Disk file operations I/O	2,602	.2	72.88us	.0	User I/O
cursor: pin S	8	.1	17.55ms	.0	Concurrency
log file sync	20	.1	6.52ms	.0	Commit
direct path write	59	0	217.10us	.0	User I/O
latch free	1	0	10.66ms	.0	Other

SQL ordered by Elapsed Time

Elapsed Time (s)	Executions	Elapsed Time per Exec (s)	%Total	%CPU	SQL Id	SQL Module	SQL Text
28,575.23	159,710	0.18	99.89	24.66	9sz3zkc69bpjh	DBMS_SCHEDULER	SELECT * FROM V$LIBCACHE_LOCKS...
1,790.62	1	1,790.62	6.26	24.82	0f1dxm6hd2c2u	DBMS_SCHEDULER	DECLARE job BINARY_INTEGER := ...
1,790.18	1	1,790.18	6.26	24.65	5vp17fw0hgrz1	DBMS_SCHEDULER	DECLARE job BINARY_INTEGER := ...

Mutex Sleep Summary

Mutex Type	Location	Sleeps	Wait Time (ms)
Library Cache	kglic1 49	1,696,792	15,671,698
Library Cache	kglic4 145	172,634	1,423,319
Library Cache	kglGetHandleReference 123	8,262	61,133
Library Cache	kglReleaseHandleReference 124	1,176	8,110
Library Cache	kglhdgn1 62	9	71
Cursor Pin	kksLockDelete [KKSCHLPIN6]	9	138
Cursor Pin	kksfbc [KKSCHLFSP2]	2	2
Row Cache	[14] kqrScan	1	0

Top SQL with Top Row Sources

SQL ID	Plan Hash	Executions	% Activity	Row Source	% Row Source	Top Event	% Event	SQL Text
9sz3zkc69bpjh	2131580607	2794	98.87	FIXED TABLE - FULL	49.95	library cache: bucket mutex X	30.36	SELECT * FROM V$LIBCACHE_LOCKS...
				FIXED TABLE - FULL	48.57	library cache: bucket mutex X	31.00

During the test, we can monitor "library cache: bucket mutex X" waits by:


select 'BLOCKING_SESSION'   sess, program, event, mod(s.p1, power(2, 17)) "buckt muext(child_latch)", s.p1, s.p2, s.p3, s.sql_id, q.sql_text, m.*, s.*, q.* 
from v$session s, v$mutex_sleep_history m, v$sqlarea q
 where s.sid = m.blocking_session and s.sql_id = q.sql_id and m.sleep_timestamp > sysdate-5/1440 and m.sleeps > 3
union all
select 'REQUESTING_SESSION' sess, program, event, mod(s.p1, power(2, 17)) "buckt muext(child_latch)", s.p1, s.p2, s.p3, s.sql_id, q.sql_text, m.*, s.*, q.* 
from v$session s, v$mutex_sleep_history m, v$sqlarea q
 where s.sid = m.requesting_session and s.sql_id = q.sql_id and m.sleep_timestamp > sysdate-5/1440 and m.sleeps > 3;

select bs.session_id, bs.session_serial#, bs.program, bs.event, bs.p1, bs.blocking_session, bs.blocking_session_serial#, bs.sql_id
      ,s.sample_time, s.session_id, s.session_serial#, s.program, s.event, s.p1, s.blocking_session, s.blocking_session_serial#, s.sql_id
  from v$active_session_history bs, v$active_session_history s
where s.event  = 'library cache: mutex X'
  and bs.event = 'library cache: bucket mutex X'
  and s.sample_time = bs.sample_time
  and mod(s.p1, power(2, 17)) = bs.p1
  and s.session_id != bs.session_id
  and s.sample_time > sysdate-5/1440
order by s.sample_time desc, bs.session_id, s.session_id;

select sql_id, last_active_time, executions, disk_reads, direct_writes, buffer_gets, rows_processed, sql_text, v.* 
from v$sqlarea v where sql_id in ('9sz3zkc69bpjh');

3. "library cache: bucket mutex X" Tracing

Open one Sqlplus window, execute bpftrace on its process (UNIX pid 293988):


bpftrace -e 'uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2, 
             uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2 / pid==293988 /
   {@ustack_cnt[probe] = count();}'

Run query to fetch rows with rownum limit:


SQL > select * from V$LIBCACHE_LOCKS where rownum <= 10;
  10 rows selected.

Here bpftrace output:


	@ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 123462
	@ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       269774

Same bpftrace output for count query:


SQL > select count(*) from V$LIBCACHE_LOCKS where rownum <= 10;
    COUNT(*)
  ----------
          10

Run query without rownum limit, we get the similar output due to FIXED TABLE FULL on X$KGLLK and X$KGLPN.


SQL >  select * from V$LIBCACHE_LOCKS;
  1226 rows selected.
        
	@ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 123488
	@ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       269939

Same bpftrace output for count query:


SQL > select count(*) from V$LIBCACHE_LOCKS;
    COUNT(*)
  ----------
        1226

To get Parameter P1 and P3 of "library cache: bucket mutex X", we can use bpftrace script:


bpftrace -e 'uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2 / pid==293988 /
   {@ustack_cnt["kglGetBucketMutex", reg("si"), reg("r8")] = count();}'

Pick a few output lines (first number is P1, second is P3):


  @ustack_cnt[kglGetBucketMutex, 6574, 145]: 2
  @ustack_cnt[kglGetBucketMutex, 5442, 49]:  2
  @ustack_cnt[kglGetBucketMutex, 3311, 49]:  2

Then we can find them in v$db_object_cache:


select child_latch, hash_value, mod(hash_value, power(2, 17)) bucket_p1, owner, substr(name, 1, 50) name, namespace, type 
from v$db_object_cache t where child_latch in (
6574,
5442,
3311
);  

CHILD_LATCH HASH_VALUE  BUCKET_P1 OWNER NAME                                          NAMESPACE            TYPE
----------- ---------- ---------- ----- --------------------------------------------- -------------------- ----------
       3311 1185156335       3311 SYS   java/util/function/DoubleBinaryOperator       TABLE/PROCEDURE      JAVA CLASS
       5442 1797395778       5442       UPDATE SYS.WRI$_ADV_                          SQL AREA             CURSOR
       6574  363993518       6574       WITH binds as           (select :dbid         SQL AREA             CURSOR

To get Parameter P3 of library cache: bucket mutex X, we can use bpftrace script:


bpftrace -e 'uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2 / pid==293988 /
   {@ustack_cnt["kglGetBucketMutex", reg("r8")] = count();}'
   
  @ustack_cnt[kglGetBucketMutex, 64]:  1
  @ustack_cnt[kglGetBucketMutex, 62]:  9
  @ustack_cnt[kglGetBucketMutex, 49]:  57846
  @ustack_cnt[kglGetBucketMutex, 145]: 65558

We can see that bpftrace output [kglGetBucketMutex, 49] and [kglGetBucketMutex, 145] big numbers match "kglic1 49" and "kglic1 145" big stats in AWR - Mutex Sleep Summary.

If we query normal tables, there are much less kglGetBucketMutex and kglGetMutex:


SQL > select count(*) from dba_objects;

    COUNT(*)
  ----------
     2004127
   
bpftrace -e 'uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2, 
             uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2 / pid==293988 /
   {@ustack_cnt[probe] = count();}'
   
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 1
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       15

4. GV$LIBCACHE_LOCKS and DBA_KGLLOCK DDL and Xplan

Both V$LIBCACHE_LOCKS and SYS.DBA_KGLLOCK are union of X$KGLLK and X$KGLPN. Any queries on them are FIXED TABLE FULL on X$KGLLK and X$KGLPN (rownum limit has no effect).


------ V$LIBCACHE_LOCKS ------
SELECT INST_ID, 'LOCK', KGLLKADR, KGLLKUSE, KGLLKSES, KGLLKHDL, KGLLKPNC, KGLLKCNT, KGLLKMOD, KGLLKREQ, KGLLKSPN, CON_ID
  FROM X$KGLLK
UNION
SELECT INST_ID, 'PIN',  KGLPNADR, KGLPNUSE, KGLPNSES, KGLPNHDL, KGLPNLCK, KGLPNCNT, KGLPNMOD, KGLPNREQ, KGLPNSPN, CON_ID
  FROM X$KGLPN
  
select * from V$LIBCACHE_LOCKS where rownum <= :B1;

Plan hash value: 2131580607
 
---------------------------------------------------------------------------------------------------
| Id  | Operation             | Name              | Rows  | Bytes |TempSpc| Cost (%CPU)| Time     |
---------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT      |                   | 20956 |  2414K|       |   868   (1)| 00:00:01 |
|*  1 |  COUNT STOPKEY        |                   |       |       |       |            |          |
|   2 |   VIEW                | GV$LIBCACHE_LOCKS | 20956 |  2414K|       |   868   (1)| 00:00:01 |
|*  3 |    SORT UNIQUE STOPKEY|                   | 20956 |  2361K|  3656K|   868   (1)| 00:00:01 |
|   4 |     UNION-ALL         |                   |       |       |       |            |          |
|*  5 |      FIXED TABLE FULL | X$KGLLK           | 18837 |  1048K|       |     0   (0)| 00:00:01 |
|*  6 |      FIXED TABLE FULL | X$KGLPN           |  2119 |   132K|       |     0   (0)| 00:00:01 |
---------------------------------------------------------------------------------------------------


------ DBA_KGLLOCK ------

CREATE OR REPLACE FORCE NONEDITIONABLE VIEW SYS.DBA_KGLLOCK
(KGLLKUSE, KGLLKHDL, KGLLKMOD, KGLLKREQ, KGLLKTYPE)
BEQUEATH DEFINER
AS 
  select kgllkuse, kgllkhdl, kgllkmod, kgllkreq, 'Lock' kgllktype from x$kgllk
    union all
  select kglpnuse, kglpnhdl, kglpnmod, kglpnreq, 'Pin'  kgllktype from x$kglpn;
  
  
select * from DBA_KGLLOCK where rownum <= :B1;

Plan hash value: 3293675002
 
-----------------------------------------------------------------------------------
| Id  | Operation           | Name        | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------------
|   0 | SELECT STATEMENT    |             | 20956 |   982K|     0   (0)| 00:00:01 |
|*  1 |  COUNT STOPKEY      |             |       |       |            |          |
|   2 |   VIEW              | DBA_KGLLOCK | 20956 |   982K|     0   (0)| 00:00:01 |
|   3 |    UNION-ALL        |             |       |       |            |          |
|   4 |     FIXED TABLE FULL| X$KGLLK     | 18837 |   423K|     0   (0)| 00:00:01 |
|   5 |     FIXED TABLE FULL| X$KGLPN     |  2119 | 48737 |     0   (0)| 00:00:01 |
-----------------------------------------------------------------------------------

5. Related Work

(1). Dynamic_plan_table, x$kqlfxpl and extreme library cache latch contention (Posted by Riyaj Shamsudeen on March 13, 2009)
(https://orainternals.wordpress.com/tag/kglic/)

We traced following two queries with above bpftrace scripts,
and the output shows that kglGetBucketMutex requests are proptional to rownum limit (:B1).
(For V$LIBCACHE_LOCKS and DBA_KGLLOCK, kglGetBucketMutex requests are constant, irrelvant to rownum limit)


select count(*) from GV$SQL_PLAN where rownum <= :B1;       --FIXED TABLE FULL on X$KQLFXPL
select count(*) from GV$ALL_SQL_PLAN where rownum <= :B1;   --FIXED TABLE FULL on X$ALL_KQLFXPL

Here the test output:


bpftrace -e 'uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2,
             uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2 / pid==299591 /
   {@ustack_cnt[probe] = count();}'
Attaching 2 probes...

select count(*) from GV$SQL_PLAN where rownum <= 1;
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 47
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       111

select count(*) from GV$SQL_PLAN where rownum <= 10;
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 117
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       287

select count(*) from GV$SQL_PLAN where rownum <= 100;
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 436
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       974

select count(*) from GV$SQL_PLAN where rownum <= 1000;
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 3523
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       7725

(2). Oracle PLITBLM "library cache: mutex X"
(https://ksun-oracle.blogspot.com/2021/04/oracle-plitblm-library-cache-mutex-x.html)

(3). Row Cache Object and Row Cache Mutex Case Study
(https://ksun-oracle.blogspot.com/2020/08/row-cache-object-and-row-cache-mutex.html)

(4). ORACLE MUTEXES, FRITS HOOGLAND (https://fritshoogland.wordpress.com/wp-content/uploads/2020/04/mutexes-2.pdf)

6. GDB script

We can also use following GDB script to trace "library cache: bucket mutex X":


---------------- bucket_mutex_1, gdb -x bucket_mutex_1 -p 293988 ----------------

set pagination off
set logging file bucket_mutex_1.log
set logging overwrite on
set logging on
set $kmutexget = 1
set $kbucketget = 1

break kglGetBucketMutex
command 
printf "------kglGetBucketMutex (%i) ---> Bucket (rsi): %d (%X), Location(r8d): %d (%X)\n", $kbucketget++, $rsi, $rsi, $r8d, $r8d
backtrace 4
continue
end

break kglGetMutex
command 
printf "------kglGetMutex (%i) ---> Mutex addr (rsi): %d (%X), Location(r8d): %d (%X)\n", $kmutexget++, $rsi, $rsi, $r8d, $r8d
continue
end

7. kglMutexLocations[] array

For all kglMutexLocations, we can try to list them with following command.
They often appear in AWR section "Mutex Sleep Summary" for Mutex Type: "Library Cache"
(or V$MUTEX_SLEEP / V$MUTEX_SLEEP_HISTORY.location).


define PrintkglMutexLocations
  set pagination off
  set $i = 0
  while $i < $arg0
    x /s *((uint64_t *)&kglMutexLocations + $i)
    set $i = $i + 1
  end
end

(gdb) PrintkglMutexLocations 150

0x15f42524:     "kglic1    49"
0x15f42a24:     "kglic2 127"
0x15f42b68:     "kglic3       144"
0x15f42b7c:     "kglic4       145"

(Only "kglic" Mutex are shown here)

Tuesday, July 1, 2025

Oracle dbms_hprof: Uncatchable Time in PL/SQL Nested Program Units

This Blog will demonstrate dbms_hprof uncatchable time in PL/SQL nested procedures and packages, which often caused misleading in elapsed time accounting, and in locating time consumption PL/SQL programs.

Note: Tested on Oracle 19c.

1. Test Code

In following code, we have one procedure which calls a nested procedure:


create or replace procedure hprof_plsql_proc_test(p_cnt_outer number, p_cnt_inner number)
is
  type t_rec is record(
             id      pls_integer
            ,name    varchar2(1000)
           );
  type t_rec_tab is table of t_rec index by pls_integer;

  l_rec       t_rec;
  l_rec_tab   t_rec_tab;
  l_row_cnt   number;

  procedure proc_nested(p_cnt_outer number, p_cnt_inner number)
  is
  begin
    for i in 1..p_cnt_outer loop    
      l_rec_tab := t_rec_tab();
      select /*+ Start */ count(*) into l_row_cnt from dual;
      for i in 1..p_cnt_inner loop
        l_rec := t_rec(
                id      => 1
               ,name    => lpad('A', 900, 'B')||i
              );
        l_rec_tab(i) := l_rec;
      end loop;
      select /*+ End */ count(*) into l_row_cnt from dual;
    end loop;
  end;

begin
  dbms_output.put_line('hprof_plsql_proc_test('||p_cnt_outer||', '||p_cnt_inner||')');

  proc_nested(p_cnt_outer, p_cnt_inner);
end;
/

2. Test Run with Unwrapped PL/SQL

Run above procedure with dbms_hprof, and create html hprofile:


create or replace directory TEST_DIR as '/testdb/oradata/hprof/';

set serveroutput on
declare
  l_test_dir        varchar2(100) := 'TEST_DIR';
  l_hprof_file_name varchar2(100) := 'hprof_plsql_proc_test_UnWrapped_1.hpf';
  l_runid           number;
begin
  dbms_hprof.start_profiling (
    location => l_test_dir,
    filename => l_hprof_file_name);

  hprof_plsql_proc_test(1e2, 1e5);

  dbms_hprof.stop_profiling;

  l_runid := dbms_hprof.analyze (
     location    => l_test_dir,
     filename    => l_hprof_file_name,
     run_comment => 'hprof_plsql_proc_test Test');

  dbms_output.put_line('l_runid=' || l_runid);
end;
/

HProf shows that HPROF_PLSQL_PROC_TEST elapsed time is sum of all its Children.

But for Subtreee PROC_NESTED, the sum of all its Children (6635+6253=12888) is much less than Subtreee time (12642594), 12629706 (12642594 - 12888) is unaccounted.

Parents and Children Elapsed Time (microsecs) Data


HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)



Subtree
Ind%
Function
Ind%
Descendants
Ind%
Calls
Ind%
Function Name
SQL ID
SQL TEXT

12652694
 100%
10088
 0.1%
12642606
 100%
1
 0.5%
HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)
Parents:
12652694
 100%
10088
 100%
12642606
 100%
1
 100%
ORACLE.root 
Children:
12642594
 100%
12629706
 100%
12888
 100%
1
 100%
HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST.PROC_NESTED (Line 13)
12
 0.0%
2
 100%
10
 100%
1
 100%
SYS.DBMS_OUTPUT.PUT_LINE (Line 109)



HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST.PROC_NESTED (Line 13)



Subtree
Ind%
Function
Ind%
Descendants
Ind%
Calls
Ind%
Function Name
SQL ID
SQL TEXT

12642594
 100%
12629706
99.8%
12888
 0.1%
1
 0.5%
HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST.PROC_NESTED (Line 13)
Parents:
12642594
 100%
12629706
 100%
12888
 100%
1
 100%
HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)
Children:
6635
51.5%
6635
 100%
0
N/A
100
 100%
HPROF_PLSQL_PROC_TEST.__static_sql_exec_line26 (Line 26) 3cfwwbgj9ft44 SELECT /*+ End */ COUNT(*) FROM DUAL
6253
48.5%
6253
 100%
0
N/A
100
 100%
HPROF_PLSQL_PROC_TEST.__static_sql_exec_line18 (Line 18) 8tuspbw8fxyrn SELECT /*+ Start */ COUNT(*) FROM DUAL


Look HProf raw file, we can see that time "P#X 139321" is not attributed to any program (PL/SQL or SQL):


P#C PLSQL."K"."HPROF_PLSQL_PROC_TEST"::7."HPROF_PLSQL_PROC_TEST.PROC_NESTED"#3048d2af80817a01 #13
P#X 8
P#C SQL."K"."HPROF_PLSQL_PROC_TEST"::7."__static_sql_exec_line18" #18."8tuspbw8fxyrn"
P#! SELECT /*+ Start */ COUNT(*) FROM DUAL
P#X 46
P#R
P#X 139321
P#C SQL."K"."HPROF_PLSQL_PROC_TEST"::7."__static_sql_exec_line26" #26."3cfwwbgj9ft44"
P#! SELECT /*+ End */ COUNT(*) FROM DUAL
P#X 73
 



3. Test Run with Wrapped PL/SQL


Install wrapped PL/SQL (see Section 4. Wrapped PL/SQL), and run it with dbms_hprof, then create html hprofile:


set serveroutput on
declare
  l_test_dir        varchar2(100) := 'AAA_RAC_XCHNG';
  l_hprof_file_name varchar2(100) := 'hprof_plsql_proc_test_Wrapped_1.hpf';
  l_runid           number;
begin
  dbms_hprof.start_profiling (
    location => l_test_dir,
    filename => l_hprof_file_name);

  hprof_plsql_proc_test(1e2, 1e5);

  dbms_hprof.stop_profiling;

  l_runid := dbms_hprof.analyze (
     location    => l_test_dir,
     filename    => l_hprof_file_name,
     run_comment => 'hprof_plsql_proc_test Test');

  dbms_output.put_line('l_runid=' || l_runid);
end;
/
 

HProf shows that the top call: HPROF_PLSQL_PROC_TEST has time = 12865442, but all its Children has only 13408.

The unaccounted time amounts to 12852034 (12865442-13408), almost no time is profiled.

Parents and Children Elapsed Time (microsecs) Data

HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)



Subtree
Ind%
Function
Ind%
Descendants
Ind%
Calls
Ind%
Function Name
SQL ID
SQL TEXT

12865442
 100%
12852034
99.9%
13408
 0.1%
1
 0.5%
HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)
Parents:
12865442
 100%
12852034
 100%
13408
 100%
1
 100%
ORACLE.root 
Children:
6879
51.3%
6879
 100%
0
N/A
100
 100%
HPROF_PLSQL_PROC_TEST.__static_sql_exec_line26 (Line 26) 3cfwwbgj9ft44 SELECT /*+ End */ COUNT(*) FROM DUAL
6517
48.6%
6517
 100%
0
N/A
100
 100%
HPROF_PLSQL_PROC_TEST.__static_sql_exec_line18 (Line 18) 8tuspbw8fxyrn SELECT /*+ Start */ COUNT(*) FROM DUAL
12
 0.1%
1
 100%
11
 100%
1
 100%
SYS.DBMS_OUTPUT.PUT_LINE (Line 109)




4. Wrapped PL/SQL



testdb $ wrap iname=hprof_plsql_proc_test.sql

testdb $ cat hprof_plsql_proc_test.plb
create or replace procedure k.hprof_plsql_proc_test wrapped
a000000
1f
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
7
3ab 233
YPWOC06TiznHzO1ynQEtqQmVKQ4wg41eLq5qfHRDNA/uugXMeaR4dUB33xnjiWxc7OHa6eMo
UnVwftozxoagoNnUy3AmGAHaM04S3mtcStgl57sTzcETVdi/D/YMuQNVnbkzODjH3tjldCfI
PWWEpIT9hgkzLRHd2fupk5Tn1CdHN5xnByVWGlMXbg0XqPL8cLg3kk3KpynTkMdP/OTRPeru
FKHxhWT06ICM2KGCeErLNw6LpKb6pfPCjSTeew4TUaHEPIncmHDrvctMqtP8r80M9J+x6KGz
64t61S0hhflZb7OPfZe4rwEDMXHejzaIG4z3mqIWXFPkcpJbiXgDHUdLqNuT9OHHHYbubIn4
sM0cQAuFXGwuV19NmMwZe8c7p93lw/WcqgsRNdp0AHNiQlXStnkAQhfEfK0uMyUJCY4+mdYh
4+57oeDnwsrl9j/AjJrZBXpd9/2We+5ua5Gyl4Ihj8bZJFql/feu8r/TLX6Tey4Yl199A6wM
dNZOnVlpsUFAMi4MM2tACZ7pNXedjIJkwX0xaWMdxwiydIiGxw==

/
 


5. PL/SQL Package Test Cdoe


The similar behaviour can be demonstrated for nested procedure/function in PL/SQL package.


create or replace package hprof_plsql_pkg_test
is
  procedure proc_outer(p_cnt_outer number, p_cnt_inner number);
end;
/

create or replace package body hprof_plsql_pkg_test
is
  type t_rec is record(
             id      pls_integer
            ,name    varchar2(1000)
           );
  type t_rec_tab is table of t_rec index by pls_integer;

  l_rec       t_rec;
  l_rec_tab   t_rec_tab;
  l_row_cnt   number;

  procedure proc_nested(p_cnt_outer number, p_cnt_inner number)
  is
  begin
    for i in 1..p_cnt_outer loop    
      l_rec_tab := t_rec_tab();
      select /*+ Start */ count(*) into l_row_cnt from dual;
      for i in 1..p_cnt_inner loop
        l_rec := t_rec(
                id      => 1
               ,name    => lpad('A', 900, 'B')||i
              );
        l_rec_tab(i) := l_rec;
      end loop;
      select /*+ End */ count(*) into l_row_cnt from dual;
    end loop;
  end;
  
  procedure proc_outer(p_cnt_outer number, p_cnt_inner number)
  is
  begin
    proc_nested(p_cnt_outer, p_cnt_inner);
  end;
end;
/

Subtree	Ind%	Function	Ind%	Descendants	Ind%	Calls	Ind%	Function Name	SQL ID	SQL TEXT
12652694	100%	10088	0.1%	12642606	100%	1	0.5%	HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)
Parents:
12652694	100%	10088	100%	12642606	100%	1	100%	ORACLE.root
Children:
12642594	100%	12629706	100%	12888	100%	1	100%	HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST.PROC_NESTED (Line 13)
12	0.0%	2	100%	10	100%	1	100%	SYS.DBMS_OUTPUT.PUT_LINE (Line 109)

Subtree	Ind%	Function	Ind%	Descendants	Ind%	Calls	Ind%	Function Name	SQL ID	SQL TEXT
12642594	100%	12629706	99.8%	12888	0.1%	1	0.5%	HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST.PROC_NESTED (Line 13)
Parents:
12642594	100%	12629706	100%	12888	100%	1	100%	HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)
Children:
6635	51.5%	6635	100%	0	N/A	100	100%	HPROF_PLSQL_PROC_TEST.__static_sql_exec_line26 (Line 26)	3cfwwbgj9ft44	SELECT /+ End / COUNT(*) FROM DUAL
6253	48.5%	6253	100%	0	N/A	100	100%	HPROF_PLSQL_PROC_TEST.__static_sql_exec_line18 (Line 18)	8tuspbw8fxyrn	SELECT /+ Start / COUNT(*) FROM DUAL

Subtree	Ind%	Function	Ind%	Descendants	Ind%	Calls	Ind%	Function Name	SQL ID	SQL TEXT
12865442	100%	12852034	99.9%	13408	0.1%	1	0.5%	HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)
Parents:
12865442	100%	12852034	100%	13408	100%	1	100%	ORACLE.root
Children:
6879	51.3%	6879	100%	0	N/A	100	100%	HPROF_PLSQL_PROC_TEST.__static_sql_exec_line26 (Line 26)	3cfwwbgj9ft44	SELECT /+ End / COUNT(*) FROM DUAL
6517	48.6%	6517	100%	0	N/A	100	100%	HPROF_PLSQL_PROC_TEST.__static_sql_exec_line18 (Line 18)	8tuspbw8fxyrn	SELECT /+ Start / COUNT(*) FROM DUAL
12	0.1%	1	100%	11	100%	1	100%	SYS.DBMS_OUTPUT.PUT_LINE (Line 109)

Tuesday, June 17, 2025

ORA-01002: fetch out of sequence: One Case Test


drop table test_tab;
 
create table test_tab (x number, y varchar2(100)); 

declare
  l_cnt number := 0;
begin
  execute immediate 'truncate table test_tab';
  insert into test_tab select level x, lpad('A', 90, 'B')||level from dual connect by level <= 500;
  
  for c in (select x, y from test_tab) loop
    l_cnt := l_cnt + 1;
    dbms_output.put_line(c.x);
    if c.x > 300 then
      rollback;
    end if;
  end loop;
  
  exception when others then
    dbms_output.put_line('Row CNT = '||l_cnt);
    raise;
end;
/

--------------- Test Output -----------------
310
311
312
313
Row CNT = 100
declare
*
ERROR at line 1:
ORA-01002: fetch out of sequence
ORA-06512: at line 17
ORA-06512: at line 7
ORA-06512: at line 7

For Temporary Table ORA-01002, see Blog: One Test on the Different Errors of Oracle Global Temporary Tables vs. Private Temporary Tables
(https://ksun-oracle.blogspot.com/2024/01/one-test-on-different-errors-of-oracle.html)

Friday, May 23, 2025

Oracle ORA-00600 [pfrsfm: stack mismatch] and [pfrsfm: stack mismatch] Reproducing

This Blog will demonstrate how to generate Oracle ORA-00600 [pfrsfm: stack mismatch] and [pfrsfm: stack mismatch].

Note: Tested on Oracle 19c

1. ORA-00600: internal error code, arguments: [pfrsfm: stack mismatch]

Open one Sqlplus window, start 400 Jobs to test (see Section 3. Test Code):


  SQL > exec start_test(400, 1e4, 1e6, 0, 5);

During and after test (it takes about 15 minutes), check DB alert.log and incident files if they contain error text like:


  ORA-00600: internal error code, arguments: [pfrsfm: stack mismatch]

If with 400 Jobs, it is not reproduced, increase it and re-run the test.

Here the incident file:


ORA-00600: internal error code, arguments: [pfrsfm: stack mismatch], [0x7F1F08BB3BF8], [0xFFFFFFFFFFFFFFFF], [0x7F1F08506000], [0x7F1F08BB33F0], 
                                                                     [0xFFFFFFFFFFFFFFFF], [0xFFFFFFFFFFFFFFFF], [], [], [], [], []
ORA-27403: scheduler stop job event
ORA-27403: scheduler stop job event

========= Dump for incident 14217 (ORA 600 [pfrsfm: stack mismatch]) ========

----- Current SQL Statement for this session (sql_id=f44xkpcgqsatb) -----
SELECT * FROM TABLE(PIPELINE_TAB(:B1 , :B2 )) WHERE ROWNUM <= :B1 -5

----- Parser State -----
Parser state1: len1=0 len2=0 pos1=0 pos2=0
Parser state2: flg=0x0 xflg=0x0 xxflg=0x400
Parser state3: tty=0 tlen=0
Parser string: prx=0x7f1f0e8a9d60 base=(nil) cur=(nil)

----- PL/SQL Call Stack -----
  object      line  object
  handle    number  name
0x9def1680         5  procedure K.TEST_JOB_PROC
0x7e82e800         1  anonymous block

[TOC00006]
----- Call Stack Trace -----
calling              call     entry                argument values in hex      
location             type     point                (? means dubious value)     
-------------------- -------- -------------------- ----------------------------
dbgexProcessError()  call     dbgexPhaseII()       7F1F0E83B6D8 7F1F0E7ED1F0
+1872                                              7FFD1ED4C800 7FFD1ED450C8 ?
                                                   000000000 ? 000000000 ?
dbgePostErrorKGE()+  call     dbgexProcessError()  7F1F0E83B6D8 7F1F0E7ED1F0
1853                                               000000001 000000000
                                                   000000000 ? 000000000 ?
dbkePostKGE_kgsf()+  call     dbgePostErrorKGE()   7F1F0E87B9C0 7F1F08BB0050
71                                                 000000258 000000000 ?
                                                   000000000 ? 000000000 ?
kgeadse()+448        call     dbkePostKGE_kgsf()   7F1F0E87B9C0 7F1F08BB0050
                                                   000000258 000000000 ?
                                                   000000000 ? 000000000 ?
kgerinv_internal()+  call     kgeadse()            7F1F0E87B9C0 ? 7F1F08BB0050
44                                                 000000258 ? 016036C24
                                                   000000000 000000006
kgerinv()+40         call     kgerinv_internal()   7F1F0E87B9C0 ? 7F1F08BB0050 ?
                                                   000000258 ? 016036C24 ?
                                                   000000000 ? 000000006 ?
kgeasnmierr()+146    call     kgerinv()            7F1F0E87B9C0 ? 7F1F08BB0050 ?
                                                   000000258 ? 016036C24 ?
                                                   000000000 ? 000000006 ?
pfrsfm()+1233        call     kgeasnmierr()        7F1F0E87B9C0 ? 7F1F08BB0050 ?
                                                   000000258 ? 016036C24 ?
                                                   000000002 7F1F08BB3BF8
pfrspopstks()+103    call     pfrsfm()             7F1F0E87B9C0 ? 7F1F08BB0050 ?
                                                   000000258 ? 016036C24 ?
                                                   000000002 ? 7F1F08BB3BF8 ?
kksumc()+526         call     pfrspopstks()        7F1F0E87B9C0 ? 7F1F08507910
                                                   000000258 ? 016036C24 ?
                                                   000000002 ? 7F1F08BB3BF8 ?
opiodr()+4705        call     kksumc()             7F1F0864F5C8 000000005

2. ORA-00600: internal error code, arguments: [pfrsfm: Stack disordered]

Open one Sqlplus window, start test by:


SQL>  exec test_job_proc(1, 1e4, 1e6, 0, 5);

Get its session SPID (1055599) and its (sid, serial#) = (915.49648).

In UNIX Terminal, run kill command:


  kill -URG 1055599

or In another Sqlplus window, run cancel statement:


  alter system cancel sql '915,49648';

(see Blog:
How to CANCEL a query running in another session? (https://tanelpoder.com/2010/02/17/how-to-cancel-a-query-running-in-another-session/)
DBMS_SCHEDULER Job Not Running and Used Slaves (https://ksun-oracle.blogspot.com/2021/01/dbmsscheduler-job-not-running-and-used.html)
)

You will see either:


SQL>  exec test_job_proc(1, 1e4, 1e6, 0, 5);
BEGIN test_job_proc(1, 1e4, 1e6, 0, 5); END;

*
ERROR at line 1:
ORA-01013: user requested cancel of current operation


SQL>  exec test_job_proc(1, 1e4, 1e6, 0, 5);
BEGIN test_job_proc(1, 1e4, 1e6, 0, 5); END;

*
ERROR at line 1:
ORA-00603: ORACLE server session terminated by fatal error
ORA-24557: error 600 encountered while handling error 1013; exiting server
process
ORA-00600: internal error code, arguments: [pfrsfm: Stack disordered],
[0x7F0FE0BA14F8], [0x7F0FE0BA22F0], [0x7F0FE0BA1850], [0x7F0FE0BA34F8], [], [],
[], [], [], [], []
ORA-01013: user requested cancel of current operation
Process ID: 1055599
Session ID: 915 Serial number: 49648

If it is the first case, in the same Oracle session (ORA-01013 does not terminate session), re-run:


   exec test_job_proc(1, 1e4, 1e6, 0, 5);

re-run UNIX kill command or SQL cancel statement.

Repeat above steps till the session hit "ORA-00600: [pfrsfm: Stack disordered]".

Then the session incident file shows:


ORA-00600: internal error code, arguments: [pfrsfm: Stack disordered], [0x7F0FE0BA14F8], [0x7F0FE0BA22F0], [0x7F0FE0BA1850], [0x7F0FE0BA34F8], 
                                                                       [], [], [], [], [], [], []
ORA-01013: user requested cancel of current operation

========= Dump for incident 11485 (ORA 600 [pfrsfm: Stack disordered]) ========

*** 2025-05-20T09:54:22.541566+02:00
dbkedDefDump(): Starting incident default dumps (flags=0x2, level=3, mask=0x0)
[TOC00003]
----- Current SQL Statement for this session (sql_id=b7vzj4s3qw97g) -----
SELECT * FROM TABLE(GET_TAB_PTF(:B1 , :B2 )) WHERE ROWNUM <= :B1 -5
[TOC00004]
----- Parser State -----
Parser state1: len1=0 len2=36 pos1=0 pos2=5
Parser state2: flg=0x0 xflg=0x4300000 xxflg=0x400
Parser state3: tty=0 tlen=5
Parser string: prx=0x7f0fe6887d60 base=(nil) cur=0x7ffce65bdbed

----- PL/SQL Call Stack -----
  object      line  object
  handle    number  name
0xa0fb7940         5  procedure K.TEST_ORA_600_DISCONNET
0x95391e58         1  anonymous block

----- Call Stack Trace -----
calling              call     entry                argument values in hex      
location             type     point                (? means dubious value)     
-------------------- -------- -------------------- ----------------------------
dbgexProcessError()  call     dbgexPhaseII()       7F0FE68196D8 7F0FE67CB1F0
+1872                                              7FFCE65B7000 7FFCE65AF908 ?
                                                   000000000 ? 000000000 ?
dbgePostErrorKGE()+  call     dbgexProcessError()  7F0FE68196D8 7F0FE67CB1F0
1853                                               000000001 000000000
                                                   000000000 ? 000000000 ?
dbkePostKGE_kgsf()+  call     dbgePostErrorKGE()   7F0FE68599C0 7F0FE0B90050
71                                                 000000258 000000000 ?
                                                   000000000 ? 000000000 ?
kgeadse()+448        call     dbkePostKGE_kgsf()   7F0FE68599C0 7F0FE0B90050
                                                   000000258 000000000 ?
                                                   000000000 ? 000000000 ?
kgerinv_internal()+  call     kgeadse()            7F0FE68599C0 ? 7F0FE0B90050
44                                                 000000258 ? 016036BEC
                                                   000000000 000000004
kgerinv()+40         call     kgerinv_internal()   7F0FE68599C0 ? 7F0FE0B90050 ?
                                                   000000258 ? 016036BEC ?
                                                   000000000 ? 000000004 ?
kgeasnmierr()+146    call     kgerinv()            7F0FE68599C0 ? 7F0FE0B90050 ?
                                                   000000258 ? 016036BEC ?
                                                   000000000 ? 000000004 ?
pfrsfm()+1055        call     kgeasnmierr()        7F0FE68599C0 ? 7F0FE0B90050 ?
                                                   000000258 ? 016036BEC ?
                                                   000000002 7F0FE0BA14F8
pfrspopstks()+103    call     pfrsfm()             7F0FE68599C0 ? 7F0FE0B90050 ?
                                                   000000258 ? 016036BEC ?
                                                   000000002 ? 7F0FE0BA14F8 ?
kksumc()+526         call     pfrspopstks()        7F0FE68599C0 ? 7F0FE0BA22F0
                                                   000000258 ? 016036BEC ?
                                                   000000002 ? 7F0FE0BA14F8 ?
opiodr()+4705        call     kksumc()             7F0FE0298488 000000005
                                                   000000258 ? 7F0FE68599C0
                                                   000000002 ? 7F0FE0BA14F8 ?

If we compare incident file of [pfrsfm: stack mismatch] and [pfrsfm: Stack disordered], we can see that they hit ORA-00600 at different pfrsfm locations:


pfrsfm()+1233

pfrsfm()+1055

3. Test Code


create or replace procedure clearup_test as
begin
  for c in (select * from dba_scheduler_jobs where job_name like '%TEST_JOB%') loop
    begin
      --set DBA_SCHEDULER_JOBS.enabled=FALSE
	    dbms_scheduler.disable (c.job_name, force => true, commit_semantics =>'ABSORB_ERRORS');
	    --set DBA_SCHEDULER_JOBS.enabled=TRUE, so that it can be scheduled to run (state='RUNNING')
	    --  dbms_scheduler.enable (c.job_name, commit_semantics =>'ABSORB_ERRORS');
	  exception when others then null;
	  end;
	end loop;
	
  for c in (select * from dba_scheduler_running_jobs where job_name like '%TEST_JOB%') loop
    begin
      --If force=FALSE, gracefully stop the job, slave process can update the status of the job in the job queue.
      --If force= TRUE, the Scheduler immediately terminates the job slave.
      --For repeating job with attribute "start_date => systimestamp" and enabled=TRUE, 
      --re-start immediate (state changed from 'SCHEDULED' to 'RUNNING'), DBA_SCHEDULER_JOBS.run_count increases 1.
	    dbms_scheduler.stop_job (c.job_name, force => true, commit_semantics =>'ABSORB_ERRORS');
	  exception when others then null;
	  end;
	end loop;
	
  for c in (select * from dba_scheduler_jobs where job_name like '%TEST_JOB%') loop
    begin
      --If force=TRUE, the Scheduler first attempts to stop the running job instances 
      --(by issuing the STOP_JOB call with the force flag set to false), and then drops the jobs.
	    dbms_scheduler.drop_job (c.job_name, force => true, commit_semantics =>'ABSORB_ERRORS');
	  exception when others then null;
	  end;
	end loop;
end;
/

drop type t_tab;
drop type t_row;

create type t_row as object (id number, name varchar2(50));
/

create type t_tab is table of t_row;
/

create or replace function pipeline_tab (p_rows in number, p_seconds number) return t_tab pipelined as
begin
  for i in 1 .. p_rows loop
    dbms_output.put_line('row: ' || i);
    pipe row(t_row(i, 'Name_' || i));
    if p_seconds > 0 then 
      dbms_session.sleep(p_seconds);
    end if;
  end loop;
  return;
end;
/

-- not needed
drop table test_tab;
create table test_tab(id number, label varchar2(10));
insert into test_tab(id, label) values(1, 'label');
commit;

create or replace procedure test_pipeline_proc_recur(p_rows number, p_seconds number, p_depth number) as
begin
  if p_depth = 0 then
    for c in (select * from table(pipeline_tab(p_rows, p_seconds)) where  rownum <= p_rows-5) 
    loop
      dbms_output.put_line('id = '||c.id);
    end loop;
  else 
    test_pipeline_proc_recur(p_rows, p_seconds, p_depth - 1);
  end if;
  exception when others then 
    dbms_output.put_line('p_depth = '||p_depth||', ERROR: ' || sqlerrm);
end;
/

create or replace procedure test_job_proc(p_job_id number, p_loop_count number, p_rows number, p_seconds number, p_depth number) as
begin
  update test_tab set id = id+1;
  for i in 1..p_loop_count loop
    execute immediate 'begin test_pipeline_proc_recur('||p_rows||', '||p_seconds||', '||p_depth||'); end; ';
  end loop;
  commit;
end;
/

create or replace procedure start_jobs(p_job_count number, p_loop_count number, p_rows number, p_seconds number, p_depth number) as
begin
  for i in 1..p_job_count loop
    dbms_scheduler.create_job (
      job_name        => 'TEST_JOB_CRASHED_1'||i,
      job_type        => 'PLSQL_BLOCK',
      job_action      => 'begin test_job_proc('||i||', '||p_loop_count||', '||p_rows||', '||p_seconds||', '||p_depth||'); end;',    
      start_date      => systimestamp,
      --repeat_interval => 'systimestamp',
      auto_drop       => true,
      enabled         => true);
  end loop;
end;
/

create or replace procedure start_test(p_job_count number, p_loop_count number, p_rows number, p_seconds number, p_depth number) as
  l_cnt number := 0;
begin
  dbms_output.put_line('Start '||p_job_count||' Jobs at '||sysdate);
  start_jobs(p_job_count, p_loop_count, p_rows, p_seconds, p_depth);
  
  -- wait for all jobs running
  dbms_output.put_line('Wait all Jobs running at '||sysdate);
  loop
    dbms_session.sleep(1);
    select count(*) into l_cnt from dba_scheduler_running_jobs where job_name like 'TEST_JOB_CRASHED%';
    exit when l_cnt >= p_job_count;
  end loop;
  
  dbms_output.put_line('Start clearup at '||sysdate);
  clearup_test;
 
  -- wait for all jobs stopped
  dbms_output.put_line('Wait clearup at '||sysdate);
  loop
    dbms_session.sleep(1);
    select count(*) into l_cnt from dba_scheduler_running_jobs where job_name like 'TEST_JOB_CRASHED%';
    exit when l_cnt = 0;
  end loop;
  dbms_output.put_line('Test End at '||sysdate);
end;
/

-- exec start_jobs(1e2, 1e4, 1e6, 0, 5);
-- exec clearup_test;
-- exec test_job_proc(1, 1e4, 1e6, 0, 5);

--================== Test Run ==================--
--  exec start_test(400, 1e4, 1e6, 0, 5);