Thursday, July 10, 2025

Blog List

"library cache: bucket mutex X" on V$ Fixed Views: Case Test

Oracle DB experienced heavy "library cache: bucket mutex X" when V$ Fixed Views were queried frequently,
which impeded Oracle normal functions like background processes Mnnn and MZnn.

Note: Tested on Oracle 19c.


1. Test Setup


In the following code, we query V$LIBCACHE_LOCKS (similar behavior for DBA_KGLLOCK), which is a union of X$KGLLK and X$KGLPN.

create or replace procedure test_bucket_mutex(p_job_id number, p_loop_count number) as
begin
  for i in 1..p_loop_count loop
    for c in (select * from V$LIBCACHE_LOCKS where rownum <= p_job_id) loop
      null;
    end loop;
  end loop;
end;
/

-- exec test_bucket_mutex(1, 1);

create or replace procedure test_bucket_mutex_jobs(p_job_count number, p_loop_count number) as
begin
  for i in 1..p_job_count loop
    dbms_scheduler.create_job (
      job_name        => 'TEST_JOB_'||i,
      job_type        => 'PLSQL_BLOCK',
      job_action      => 'begin test_bucket_mutex('||i||', '||p_loop_count||'); end;',    
      start_date      => systimestamp,
      --repeat_interval => 'systimestamp',
      auto_drop       => true,
      enabled         => true);
  end loop;
end;
/


2. Test Run


Run a test with 16 Jobs:

exec test_bucket_mutex_jobs(16, 1e4);
AWR shows:

Top 10 Foreground Events by Total Wait Time

EventWaitsTotal Wait Time (sec)Avg Wait% DB timeWait Class
library cache: bucket mutex X939,40217.1K18.22ms59.8Concurrency
DB CPU 7062.2 24.7 
library cache: mutex X5,13969.313.48ms.2Concurrency
control file sequential read20,8742.9137.27us.0System I/O
db file sequential read3,1951.8557.97us.0User I/O
Disk file operations I/O2,602.272.88us.0User I/O
cursor: pin S8.117.55ms.0Concurrency
log file sync20.16.52ms.0Commit
direct path write590217.10us.0User I/O
latch free1010.66ms.0Other

SQL ordered by Elapsed Time

Elapsed Time (s)Executions Elapsed Time per Exec (s) %Total%CPU%IO SQL IdSQL ModuleSQL Text
28,575.23159,7100.1899.8924.660.009sz3zkc69bpjh DBMS_SCHEDULER SELECT * FROM V$LIBCACHE_LOCKS...
1,790.6211,790.626.2624.820.000f1dxm6hd2c2u DBMS_SCHEDULER DECLARE job BINARY_INTEGER := ...
1,790.1811,790.186.2624.650.005vp17fw0hgrz1 DBMS_SCHEDULER DECLARE job BINARY_INTEGER := ...

Mutex Sleep Summary

Mutex TypeLocationSleepsWait Time (ms)
Library Cachekglic1 491,696,79215,671,698
Library Cachekglic4 145172,6341,423,319
Library CachekglGetHandleReference 1238,26261,133
Library CachekglReleaseHandleReference 1241,1768,110
Library Cachekglhdgn1 62971
Cursor PinkksLockDelete [KKSCHLPIN6]9138
Cursor Pinkksfbc [KKSCHLFSP2]22
Row Cache[14] kqrScan10

Top SQL with Top Row Sources

SQL IDPlan HashExecutions% ActivityRow Source% Row SourceTop Event% EventSQL Text
9sz3zkc69bpjh2131580607279498.87 FIXED TABLE - FULL49.95library cache: bucket mutex X30.36 SELECT * FROM V$LIBCACHE_LOCKS...
FIXED TABLE - FULL48.57library cache: bucket mutex X31.00

During the test, we can monitor "library cache: bucket mutex X" waits by:


select 'BLOCKING_SESSION'   sess, program, event, mod(s.p1, power(2, 17)) "buckt muext(child_latch)", s.p1, s.p2, s.p3, s.sql_id, q.sql_text, m.*, s.*, q.* 
from v$session s, v$mutex_sleep_history m, v$sqlarea q
 where s.sid = m.blocking_session and s.sql_id = q.sql_id and m.sleep_timestamp > sysdate-5/1440 and m.sleeps > 3
union all
select 'REQUESTING_SESSION' sess, program, event, mod(s.p1, power(2, 17)) "buckt muext(child_latch)", s.p1, s.p2, s.p3, s.sql_id, q.sql_text, m.*, s.*, q.* 
from v$session s, v$mutex_sleep_history m, v$sqlarea q
 where s.sid = m.requesting_session and s.sql_id = q.sql_id and m.sleep_timestamp > sysdate-5/1440 and m.sleeps > 3;

select bs.session_id, bs.session_serial#, bs.program, bs.event, bs.p1, bs.blocking_session, bs.blocking_session_serial#, bs.sql_id
      ,s.sample_time, s.session_id, s.session_serial#, s.program, s.event, s.p1, s.blocking_session, s.blocking_session_serial#, s.sql_id
  from v$active_session_history bs, v$active_session_history s
where s.event  = 'library cache: mutex X'
  and bs.event = 'library cache: bucket mutex X'
  and s.sample_time = bs.sample_time
  and mod(s.p1, power(2, 17)) = bs.p1
  and s.session_id != bs.session_id
  and s.sample_time > sysdate-5/1440
order by s.sample_time desc, bs.session_id, s.session_id;

select sql_id, last_active_time, executions, disk_reads, direct_writes, buffer_gets, rows_processed, sql_text, v.* 
from v$sqlarea v where sql_id in ('9sz3zkc69bpjh');


3. "library cache: bucket mutex X" Tracing


Open one Sqlplus window, execute bpftrace on its process (UNIX pid 293988):

bpftrace -e 'uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2, 
             uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2 / pid==293988 /
   {@ustack_cnt[probe] = count();}' 
Run query to fetch rows with rownum limit:

SQL > select * from V$LIBCACHE_LOCKS where rownum <= 10;
  10 rows selected.
Here bpftrace output:

	@ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 123462
	@ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       269774
Same bpftrace output for count query:

SQL > select count(*) from V$LIBCACHE_LOCKS where rownum <= 10;
    COUNT(*)
  ----------
          10
Run query without rownum limit, we get the similar output due to FIXED TABLE FULL on X$KGLLK and X$KGLPN.

SQL >  select * from V$LIBCACHE_LOCKS;
  1226 rows selected.
        
	@ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 123488
	@ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       269939
Same bpftrace output for count query:

SQL > select count(*) from V$LIBCACHE_LOCKS;
    COUNT(*)
  ----------
        1226
To get Parameter P1 and P3 of "library cache: bucket mutex X", we can use bpftrace script:

bpftrace -e 'uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2 / pid==293988 /
   {@ustack_cnt["kglGetBucketMutex", reg("si"), reg("r8")] = count();}'
Pick a few output lines (first number is P1, second is P3):

  @ustack_cnt[kglGetBucketMutex, 6574, 145]: 2
  @ustack_cnt[kglGetBucketMutex, 5442, 49]:  2
  @ustack_cnt[kglGetBucketMutex, 3311, 49]:  2
Then we can find them in v$db_object_cache:

select child_latch, hash_value, mod(hash_value, power(2, 17)) bucket_p1, owner, substr(name, 1, 50) name, namespace, type 
from v$db_object_cache t where child_latch in (
6574,
5442,
3311
);  

CHILD_LATCH HASH_VALUE  BUCKET_P1 OWNER NAME                                          NAMESPACE            TYPE
----------- ---------- ---------- ----- --------------------------------------------- -------------------- ----------
       3311 1185156335       3311 SYS   java/util/function/DoubleBinaryOperator       TABLE/PROCEDURE      JAVA CLASS
       5442 1797395778       5442       UPDATE SYS.WRI$_ADV_                          SQL AREA             CURSOR
       6574  363993518       6574       WITH binds as           (select :dbid         SQL AREA             CURSOR
To get Parameter P3 of library cache: bucket mutex X, we can use bpftrace script:

bpftrace -e 'uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2 / pid==293988 /
   {@ustack_cnt["kglGetBucketMutex", reg("r8")] = count();}'
   
  @ustack_cnt[kglGetBucketMutex, 64]:  1
  @ustack_cnt[kglGetBucketMutex, 62]:  9
  @ustack_cnt[kglGetBucketMutex, 49]:  57846
  @ustack_cnt[kglGetBucketMutex, 145]: 65558
We can see that bpftrace output [kglGetBucketMutex, 49] and [kglGetBucketMutex, 145] big numbers match "kglic1 49" and "kglic1 145" big stats in AWR - Mutex Sleep Summary.

If we query normal tables, there are much less kglGetBucketMutex and kglGetMutex:

SQL > select count(*) from dba_objects;

    COUNT(*)
  ----------
     2004127
   
bpftrace -e 'uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2, 
             uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2 / pid==293988 /
   {@ustack_cnt[probe] = count();}'
   
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 1
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       15


4. GV$LIBCACHE_LOCKS and DBA_KGLLOCK DDL and Xplan


Both V$LIBCACHE_LOCKS and SYS.DBA_KGLLOCK are union of X$KGLLK and X$KGLPN. Any queries on them are FIXED TABLE FULL on X$KGLLK and X$KGLPN (rownum limit has no effect).

------ V$LIBCACHE_LOCKS ------
SELECT INST_ID, 'LOCK', KGLLKADR, KGLLKUSE, KGLLKSES, KGLLKHDL, KGLLKPNC, KGLLKCNT, KGLLKMOD, KGLLKREQ, KGLLKSPN, CON_ID
  FROM X$KGLLK
UNION
SELECT INST_ID, 'PIN',  KGLPNADR, KGLPNUSE, KGLPNSES, KGLPNHDL, KGLPNLCK, KGLPNCNT, KGLPNMOD, KGLPNREQ, KGLPNSPN, CON_ID
  FROM X$KGLPN
  
select * from V$LIBCACHE_LOCKS where rownum <= :B1;

Plan hash value: 2131580607
 
---------------------------------------------------------------------------------------------------
| Id  | Operation             | Name              | Rows  | Bytes |TempSpc| Cost (%CPU)| Time     |
---------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT      |                   | 20956 |  2414K|       |   868   (1)| 00:00:01 |
|*  1 |  COUNT STOPKEY        |                   |       |       |       |            |          |
|   2 |   VIEW                | GV$LIBCACHE_LOCKS | 20956 |  2414K|       |   868   (1)| 00:00:01 |
|*  3 |    SORT UNIQUE STOPKEY|                   | 20956 |  2361K|  3656K|   868   (1)| 00:00:01 |
|   4 |     UNION-ALL         |                   |       |       |       |            |          |
|*  5 |      FIXED TABLE FULL | X$KGLLK           | 18837 |  1048K|       |     0   (0)| 00:00:01 |
|*  6 |      FIXED TABLE FULL | X$KGLPN           |  2119 |   132K|       |     0   (0)| 00:00:01 |
---------------------------------------------------------------------------------------------------

------ DBA_KGLLOCK ------

CREATE OR REPLACE FORCE NONEDITIONABLE VIEW SYS.DBA_KGLLOCK
(KGLLKUSE, KGLLKHDL, KGLLKMOD, KGLLKREQ, KGLLKTYPE)
BEQUEATH DEFINER
AS 
  select kgllkuse, kgllkhdl, kgllkmod, kgllkreq, 'Lock' kgllktype from x$kgllk
    union all
  select kglpnuse, kglpnhdl, kglpnmod, kglpnreq, 'Pin'  kgllktype from x$kglpn;
  
  
select * from DBA_KGLLOCK where rownum <= :B1;

Plan hash value: 3293675002
 
-----------------------------------------------------------------------------------
| Id  | Operation           | Name        | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------------
|   0 | SELECT STATEMENT    |             | 20956 |   982K|     0   (0)| 00:00:01 |
|*  1 |  COUNT STOPKEY      |             |       |       |            |          |
|   2 |   VIEW              | DBA_KGLLOCK | 20956 |   982K|     0   (0)| 00:00:01 |
|   3 |    UNION-ALL        |             |       |       |            |          |
|   4 |     FIXED TABLE FULL| X$KGLLK     | 18837 |   423K|     0   (0)| 00:00:01 |
|   5 |     FIXED TABLE FULL| X$KGLPN     |  2119 | 48737 |     0   (0)| 00:00:01 |
-----------------------------------------------------------------------------------


5. Related Work


(1). Dynamic_plan_table, x$kqlfxpl and extreme library cache latch contention (Posted by Riyaj Shamsudeen on March 13, 2009)
(https://orainternals.wordpress.com/tag/kglic/)

We traced following two queries with above bpftrace scripts,
and the output shows that kglGetBucketMutex requests are proptional to rownum limit (:B1).
(For V$LIBCACHE_LOCKS and DBA_KGLLOCK, kglGetBucketMutex requests are constant, irrelvant to rownum limit)

select count(*) from GV$SQL_PLAN where rownum <= :B1;       --FIXED TABLE FULL on X$KQLFXPL
select count(*) from GV$ALL_SQL_PLAN where rownum <= :B1;   --FIXED TABLE FULL on X$ALL_KQLFXPL
Here the test output:

bpftrace -e 'uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2,
             uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2 / pid==299591 /
   {@ustack_cnt[probe] = count();}'
Attaching 2 probes...

select count(*) from GV$SQL_PLAN where rownum <= 1;
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 47
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       111

select count(*) from GV$SQL_PLAN where rownum <= 10;
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 117
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       287

select count(*) from GV$SQL_PLAN where rownum <= 100;
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 436
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       974

select count(*) from GV$SQL_PLAN where rownum <= 1000;
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetBucketMutex+2]: 3523
  @ustack_cnt[uprobe:/orabin/app/oracle/product/19.27.0.0.250415-212/bin/oracle:kglGetMutex+2]:       7725
(2). Oracle PLITBLM "library cache: mutex X"
(https://ksun-oracle.blogspot.com/2021/04/oracle-plitblm-library-cache-mutex-x.html)

(3). Row Cache Object and Row Cache Mutex Case Study
(https://ksun-oracle.blogspot.com/2020/08/row-cache-object-and-row-cache-mutex.html)

(4). ORACLE MUTEXES, FRITS HOOGLAND (https://fritshoogland.wordpress.com/wp-content/uploads/2020/04/mutexes-2.pdf)


6. GDB script


We can also use following GDB script to trace "library cache: bucket mutex X":

---------------- bucket_mutex_1, gdb -x bucket_mutex_1 -p 293988 ----------------

set pagination off
set logging file bucket_mutex_1.log
set logging overwrite on
set logging on
set $kmutexget = 1
set $kbucketget = 1

break kglGetBucketMutex
command 
printf "------kglGetBucketMutex (%i) ---> Bucket (rsi): %d (%X), Location(r8d): %d (%X)\n", $kbucketget++, $rsi, $rsi, $r8d, $r8d
backtrace 4
continue
end

break kglGetMutex
command 
printf "------kglGetMutex (%i) ---> Mutex addr (rsi): %d (%X), Location(r8d): %d (%X)\n", $kmutexget++, $rsi, $rsi, $r8d, $r8d
continue
end


7. kglMutexLocations[] array


For all kglMutexLocations, we can try to list them with following command.
They often appear in AWR section "Mutex Sleep Summary" for Mutex Type: "Library Cache"
(or V$MUTEX_SLEEP / V$MUTEX_SLEEP_HISTORY.location).

define PrintkglMutexLocations
  set pagination off
  set $i = 0
  while $i < $arg0
    x /s *((uint64_t *)&kglMutexLocations + $i)
    set $i = $i + 1
  end
end

(gdb) PrintkglMutexLocations 150

0x15f42524:     "kglic1    49"
0x15f42a24:     "kglic2 127"
0x15f42b68:     "kglic3       144"
0x15f42b7c:     "kglic4       145"

(Only "kglic" Mutex are shown here)

Tuesday, July 1, 2025

Oracle dbms_hprof: Uncatchable Time in PL/SQL Nested Program Units

This Blog will demonstrate dbms_hprof uncatchable time in PL/SQL nested procedures and packages, which often caused misleading in elapsed time accounting, and in locating time consumption PL/SQL programs.

Note: Tested on Oracle 19c.


1. Test Code


In following code, we have one procedure which calls a nested procedure:

create or replace procedure hprof_plsql_proc_test(p_cnt_outer number, p_cnt_inner number)
is
  type t_rec is record(
             id      pls_integer
            ,name    varchar2(1000)
           );
  type t_rec_tab is table of t_rec index by pls_integer;

  l_rec       t_rec;
  l_rec_tab   t_rec_tab;
  l_row_cnt   number;

  procedure proc_nested(p_cnt_outer number, p_cnt_inner number)
  is
  begin
    for i in 1..p_cnt_outer loop    
      l_rec_tab := t_rec_tab();
      select /*+ Start */ count(*) into l_row_cnt from dual;
      for i in 1..p_cnt_inner loop
        l_rec := t_rec(
                id      => 1
               ,name    => lpad('A', 900, 'B')||i
              );
        l_rec_tab(i) := l_rec;
      end loop;
      select /*+ End */ count(*) into l_row_cnt from dual;
    end loop;
  end;

begin
  dbms_output.put_line('hprof_plsql_proc_test('||p_cnt_outer||', '||p_cnt_inner||')');

  proc_nested(p_cnt_outer, p_cnt_inner);
end;
/


2. Test Run with Unwrapped PL/SQL


Run above procedure with dbms_hprof, and create html hprofile:

create or replace directory TEST_DIR as '/testdb/oradata/hprof/';

set serveroutput on
declare
  l_test_dir        varchar2(100) := 'TEST_DIR';
  l_hprof_file_name varchar2(100) := 'hprof_plsql_proc_test_UnWrapped_1.hpf';
  l_runid           number;
begin
  dbms_hprof.start_profiling (
    location => l_test_dir,
    filename => l_hprof_file_name);

  hprof_plsql_proc_test(1e2, 1e5);

  dbms_hprof.stop_profiling;

  l_runid := dbms_hprof.analyze (
     location    => l_test_dir,
     filename    => l_hprof_file_name,
     run_comment => 'hprof_plsql_proc_test Test');

  dbms_output.put_line('l_runid=' || l_runid);
end;
/
HProf shows that HPROF_PLSQL_PROC_TEST elapsed time is sum of all its Children.

But for Subtreee PROC_NESTED, the sum of all its Children (6635+6253=12888) is much less than Subtreee time (12642594), 12629706 (12642594 - 12888) is unaccounted.

Parents and Children Elapsed Time (microsecs) Data


HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)


Subtree Ind% Function Ind% Descendants Ind% Calls Ind% Function Name SQL ID SQL TEXT
12652694 100% 10088 0.1% 12642606 100% 1 0.5% HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)
Parents:
12652694 100% 10088 100% 12642606 100% 1 100% ORACLE.root
Children:
12642594 100% 12629706 100% 12888 100% 1 100% HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST.PROC_NESTED (Line 13)
12 0.0% 2 100% 10 100% 1 100% SYS.DBMS_OUTPUT.PUT_LINE (Line 109)
HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST.PROC_NESTED (Line 13)
Subtree Ind% Function Ind% Descendants Ind% Calls Ind% Function Name SQL ID SQL TEXT
12642594 100% 12629706 99.8% 12888 0.1% 1 0.5% HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST.PROC_NESTED (Line 13)
Parents:
12642594 100% 12629706 100% 12888 100% 1 100% HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)
Children:
6635 51.5% 6635 100% 0 N/A 100 100% HPROF_PLSQL_PROC_TEST.__static_sql_exec_line26 (Line 26)3cfwwbgj9ft44SELECT /*+ End */ COUNT(*) FROM DUAL
6253 48.5% 6253 100% 0 N/A 100 100% HPROF_PLSQL_PROC_TEST.__static_sql_exec_line18 (Line 18)8tuspbw8fxyrnSELECT /*+ Start */ COUNT(*) FROM DUAL
Look HProf raw file, we can see that time "P#X 139321" is not attributed to any program (PL/SQL or SQL):

P#C PLSQL."K"."HPROF_PLSQL_PROC_TEST"::7."HPROF_PLSQL_PROC_TEST.PROC_NESTED"#3048d2af80817a01 #13
P#X 8
P#C SQL."K"."HPROF_PLSQL_PROC_TEST"::7."__static_sql_exec_line18" #18."8tuspbw8fxyrn"
P#! SELECT /*+ Start */ COUNT(*) FROM DUAL
P#X 46
P#R
P#X 139321
P#C SQL."K"."HPROF_PLSQL_PROC_TEST"::7."__static_sql_exec_line26" #26."3cfwwbgj9ft44"
P#! SELECT /*+ End */ COUNT(*) FROM DUAL
P#X 73

3. Test Run with Wrapped PL/SQL

Install wrapped PL/SQL (see Section 4. Wrapped PL/SQL), and run it with dbms_hprof, then create html hprofile:

set serveroutput on
declare
  l_test_dir        varchar2(100) := 'AAA_RAC_XCHNG';
  l_hprof_file_name varchar2(100) := 'hprof_plsql_proc_test_Wrapped_1.hpf';
  l_runid           number;
begin
  dbms_hprof.start_profiling (
    location => l_test_dir,
    filename => l_hprof_file_name);

  hprof_plsql_proc_test(1e2, 1e5);

  dbms_hprof.stop_profiling;

  l_runid := dbms_hprof.analyze (
     location    => l_test_dir,
     filename    => l_hprof_file_name,
     run_comment => 'hprof_plsql_proc_test Test');

  dbms_output.put_line('l_runid=' || l_runid);
end;
/
HProf shows that the top call: HPROF_PLSQL_PROC_TEST has time = 12865442, but all its Children has only 13408.
The unaccounted time amounts to 12852034 (12865442-13408), almost no time is profiled.

Parents and Children Elapsed Time (microsecs) Data


HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)


Subtree Ind% Function Ind% Descendants Ind% Calls Ind% Function Name SQL ID SQL TEXT
12865442 100% 12852034 99.9% 13408 0.1% 1 0.5% HPROF_PLSQL_PROC_TEST.HPROF_PLSQL_PROC_TEST (Line 1)
Parents:
12865442 100% 12852034 100% 13408 100% 1 100% ORACLE.root
Children:
6879 51.3% 6879 100% 0 N/A 100 100% HPROF_PLSQL_PROC_TEST.__static_sql_exec_line26 (Line 26)3cfwwbgj9ft44SELECT /*+ End */ COUNT(*) FROM DUAL
6517 48.6% 6517 100% 0 N/A 100 100% HPROF_PLSQL_PROC_TEST.__static_sql_exec_line18 (Line 18)8tuspbw8fxyrnSELECT /*+ Start */ COUNT(*) FROM DUAL
12 0.1% 1 100% 11 100% 1 100% SYS.DBMS_OUTPUT.PUT_LINE (Line 109)

4. Wrapped PL/SQL


testdb $ wrap iname=hprof_plsql_proc_test.sql

testdb $ cat hprof_plsql_proc_test.plb
create or replace procedure k.hprof_plsql_proc_test wrapped
a000000
1f
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
abcd
7
3ab 233
YPWOC06TiznHzO1ynQEtqQmVKQ4wg41eLq5qfHRDNA/uugXMeaR4dUB33xnjiWxc7OHa6eMo
UnVwftozxoagoNnUy3AmGAHaM04S3mtcStgl57sTzcETVdi/D/YMuQNVnbkzODjH3tjldCfI
PWWEpIT9hgkzLRHd2fupk5Tn1CdHN5xnByVWGlMXbg0XqPL8cLg3kk3KpynTkMdP/OTRPeru
FKHxhWT06ICM2KGCeErLNw6LpKb6pfPCjSTeew4TUaHEPIncmHDrvctMqtP8r80M9J+x6KGz
64t61S0hhflZb7OPfZe4rwEDMXHejzaIG4z3mqIWXFPkcpJbiXgDHUdLqNuT9OHHHYbubIn4
sM0cQAuFXGwuV19NmMwZe8c7p93lw/WcqgsRNdp0AHNiQlXStnkAQhfEfK0uMyUJCY4+mdYh
4+57oeDnwsrl9j/AjJrZBXpd9/2We+5ua5Gyl4Ihj8bZJFql/feu8r/TLX6Tey4Yl199A6wM
dNZOnVlpsUFAMi4MM2tACZ7pNXedjIJkwX0xaWMdxwiydIiGxw==

/

5. PL/SQL Package Test Cdoe

The similar behaviour can be demonstrated for nested procedure/function in PL/SQL package.

create or replace package hprof_plsql_pkg_test
is
  procedure proc_outer(p_cnt_outer number, p_cnt_inner number);
end;
/

create or replace package body hprof_plsql_pkg_test
is
  type t_rec is record(
             id      pls_integer
            ,name    varchar2(1000)
           );
  type t_rec_tab is table of t_rec index by pls_integer;

  l_rec       t_rec;
  l_rec_tab   t_rec_tab;
  l_row_cnt   number;

  procedure proc_nested(p_cnt_outer number, p_cnt_inner number)
  is
  begin
    for i in 1..p_cnt_outer loop    
      l_rec_tab := t_rec_tab();
      select /*+ Start */ count(*) into l_row_cnt from dual;
      for i in 1..p_cnt_inner loop
        l_rec := t_rec(
                id      => 1
               ,name    => lpad('A', 900, 'B')||i
              );
        l_rec_tab(i) := l_rec;
      end loop;
      select /*+ End */ count(*) into l_row_cnt from dual;
    end loop;
  end;
  
  procedure proc_outer(p_cnt_outer number, p_cnt_inner number)
  is
  begin
    proc_nested(p_cnt_outer, p_cnt_inner);
  end;
end;
/

Tuesday, June 17, 2025

ORA-01002: fetch out of sequence: One Case Test


drop table test_tab;
 
create table test_tab (x number, y varchar2(100)); 

declare
  l_cnt number := 0;
begin
  execute immediate 'truncate table test_tab';
  insert into test_tab select level x, lpad('A', 90, 'B')||level from dual connect by level <= 500;
  
  for c in (select x, y from test_tab) loop
    l_cnt := l_cnt + 1;
    dbms_output.put_line(c.x);
    if c.x > 300 then
      rollback;
    end if;
  end loop;
  
  exception when others then
    dbms_output.put_line('Row CNT = '||l_cnt);
    raise;
end;
/

--------------- Test Output -----------------
310
311
312
313
Row CNT = 100
declare
*
ERROR at line 1:
ORA-01002: fetch out of sequence
ORA-06512: at line 17
ORA-06512: at line 7
ORA-06512: at line 7
For Temporary Table ORA-01002, see Blog: One Test on the Different Errors of Oracle Global Temporary Tables vs. Private Temporary Tables
(https://ksun-oracle.blogspot.com/2024/01/one-test-on-different-errors-of-oracle.html)

Friday, May 23, 2025

Oracle ORA-00600 [pfrsfm: stack mismatch] and [pfrsfm: stack mismatch] Reproducing

This Blog will demonstrate how to generate Oracle ORA-00600 [pfrsfm: stack mismatch] and [pfrsfm: stack mismatch].

Note: Tested on Oracle 19c


1. ORA-00600: internal error code, arguments: [pfrsfm: stack mismatch]


Open one Sqlplus window, start 400 Jobs to test (see Section 3. Test Code):

  SQL > exec start_test(400, 1e4, 1e6, 0, 5);
During and after test (it takes about 15 minutes), check DB alert.log and incident files if they contain error text like:

  ORA-00600: internal error code, arguments: [pfrsfm: stack mismatch]
If with 400 Jobs, it is not reproduced, increase it and re-run the test.

Here the incident file:

ORA-00600: internal error code, arguments: [pfrsfm: stack mismatch], [0x7F1F08BB3BF8], [0xFFFFFFFFFFFFFFFF], [0x7F1F08506000], [0x7F1F08BB33F0], 
                                                                     [0xFFFFFFFFFFFFFFFF], [0xFFFFFFFFFFFFFFFF], [], [], [], [], []
ORA-27403: scheduler stop job event
ORA-27403: scheduler stop job event

========= Dump for incident 14217 (ORA 600 [pfrsfm: stack mismatch]) ========

----- Current SQL Statement for this session (sql_id=f44xkpcgqsatb) -----
SELECT * FROM TABLE(PIPELINE_TAB(:B1 , :B2 )) WHERE ROWNUM <= :B1 -5

----- Parser State -----
Parser state1: len1=0 len2=0 pos1=0 pos2=0
Parser state2: flg=0x0 xflg=0x0 xxflg=0x400
Parser state3: tty=0 tlen=0
Parser string: prx=0x7f1f0e8a9d60 base=(nil) cur=(nil)

----- PL/SQL Call Stack -----
  object      line  object
  handle    number  name
0x9def1680         5  procedure K.TEST_JOB_PROC
0x7e82e800         1  anonymous block

[TOC00006]
----- Call Stack Trace -----
calling              call     entry                argument values in hex      
location             type     point                (? means dubious value)     
-------------------- -------- -------------------- ----------------------------
dbgexProcessError()  call     dbgexPhaseII()       7F1F0E83B6D8 7F1F0E7ED1F0
+1872                                              7FFD1ED4C800 7FFD1ED450C8 ?
                                                   000000000 ? 000000000 ?
dbgePostErrorKGE()+  call     dbgexProcessError()  7F1F0E83B6D8 7F1F0E7ED1F0
1853                                               000000001 000000000
                                                   000000000 ? 000000000 ?
dbkePostKGE_kgsf()+  call     dbgePostErrorKGE()   7F1F0E87B9C0 7F1F08BB0050
71                                                 000000258 000000000 ?
                                                   000000000 ? 000000000 ?
kgeadse()+448        call     dbkePostKGE_kgsf()   7F1F0E87B9C0 7F1F08BB0050
                                                   000000258 000000000 ?
                                                   000000000 ? 000000000 ?
kgerinv_internal()+  call     kgeadse()            7F1F0E87B9C0 ? 7F1F08BB0050
44                                                 000000258 ? 016036C24
                                                   000000000 000000006
kgerinv()+40         call     kgerinv_internal()   7F1F0E87B9C0 ? 7F1F08BB0050 ?
                                                   000000258 ? 016036C24 ?
                                                   000000000 ? 000000006 ?
kgeasnmierr()+146    call     kgerinv()            7F1F0E87B9C0 ? 7F1F08BB0050 ?
                                                   000000258 ? 016036C24 ?
                                                   000000000 ? 000000006 ?
pfrsfm()+1233        call     kgeasnmierr()        7F1F0E87B9C0 ? 7F1F08BB0050 ?
                                                   000000258 ? 016036C24 ?
                                                   000000002 7F1F08BB3BF8
pfrspopstks()+103    call     pfrsfm()             7F1F0E87B9C0 ? 7F1F08BB0050 ?
                                                   000000258 ? 016036C24 ?
                                                   000000002 ? 7F1F08BB3BF8 ?
kksumc()+526         call     pfrspopstks()        7F1F0E87B9C0 ? 7F1F08507910
                                                   000000258 ? 016036C24 ?
                                                   000000002 ? 7F1F08BB3BF8 ?
opiodr()+4705        call     kksumc()             7F1F0864F5C8 000000005


2. ORA-00600: internal error code, arguments: [pfrsfm: Stack disordered]


Open one Sqlplus window, start test by:

SQL>  exec test_job_proc(1, 1e4, 1e6, 0, 5);
Get its session SPID (1055599) and its (sid, serial#) = (915.49648).

In UNIX Terminal, run kill command:

  kill -URG 1055599
or In another Sqlplus window, run cancel statement:

  alter system cancel sql '915,49648';
(see Blog:
   How to CANCEL a query running in another session? (https://tanelpoder.com/2010/02/17/how-to-cancel-a-query-running-in-another-session/)
   DBMS_SCHEDULER Job Not Running and Used Slaves (https://ksun-oracle.blogspot.com/2021/01/dbmsscheduler-job-not-running-and-used.html)
)

You will see either:

SQL>  exec test_job_proc(1, 1e4, 1e6, 0, 5);
BEGIN test_job_proc(1, 1e4, 1e6, 0, 5); END;

*
ERROR at line 1:
ORA-01013: user requested cancel of current operation
or

SQL>  exec test_job_proc(1, 1e4, 1e6, 0, 5);
BEGIN test_job_proc(1, 1e4, 1e6, 0, 5); END;

*
ERROR at line 1:
ORA-00603: ORACLE server session terminated by fatal error
ORA-24557: error 600 encountered while handling error 1013; exiting server
process
ORA-00600: internal error code, arguments: [pfrsfm: Stack disordered],
[0x7F0FE0BA14F8], [0x7F0FE0BA22F0], [0x7F0FE0BA1850], [0x7F0FE0BA34F8], [], [],
[], [], [], [], []
ORA-01013: user requested cancel of current operation
Process ID: 1055599
Session ID: 915 Serial number: 49648
If it is the first case, in the same Oracle session (ORA-01013 does not terminate session), re-run:

   exec test_job_proc(1, 1e4, 1e6, 0, 5); 
re-run UNIX kill command or SQL cancel statement.

Repeat above steps till the session hit "ORA-00600: [pfrsfm: Stack disordered]".

Then the session incident file shows:

ORA-00600: internal error code, arguments: [pfrsfm: Stack disordered], [0x7F0FE0BA14F8], [0x7F0FE0BA22F0], [0x7F0FE0BA1850], [0x7F0FE0BA34F8], 
                                                                       [], [], [], [], [], [], []
ORA-01013: user requested cancel of current operation

========= Dump for incident 11485 (ORA 600 [pfrsfm: Stack disordered]) ========

*** 2025-05-20T09:54:22.541566+02:00
dbkedDefDump(): Starting incident default dumps (flags=0x2, level=3, mask=0x0)
[TOC00003]
----- Current SQL Statement for this session (sql_id=b7vzj4s3qw97g) -----
SELECT * FROM TABLE(GET_TAB_PTF(:B1 , :B2 )) WHERE ROWNUM <= :B1 -5
[TOC00004]
----- Parser State -----
Parser state1: len1=0 len2=36 pos1=0 pos2=5
Parser state2: flg=0x0 xflg=0x4300000 xxflg=0x400
Parser state3: tty=0 tlen=5
Parser string: prx=0x7f0fe6887d60 base=(nil) cur=0x7ffce65bdbed

----- PL/SQL Call Stack -----
  object      line  object
  handle    number  name
0xa0fb7940         5  procedure K.TEST_ORA_600_DISCONNET
0x95391e58         1  anonymous block

----- Call Stack Trace -----
calling              call     entry                argument values in hex      
location             type     point                (? means dubious value)     
-------------------- -------- -------------------- ----------------------------
dbgexProcessError()  call     dbgexPhaseII()       7F0FE68196D8 7F0FE67CB1F0
+1872                                              7FFCE65B7000 7FFCE65AF908 ?
                                                   000000000 ? 000000000 ?
dbgePostErrorKGE()+  call     dbgexProcessError()  7F0FE68196D8 7F0FE67CB1F0
1853                                               000000001 000000000
                                                   000000000 ? 000000000 ?
dbkePostKGE_kgsf()+  call     dbgePostErrorKGE()   7F0FE68599C0 7F0FE0B90050
71                                                 000000258 000000000 ?
                                                   000000000 ? 000000000 ?
kgeadse()+448        call     dbkePostKGE_kgsf()   7F0FE68599C0 7F0FE0B90050
                                                   000000258 000000000 ?
                                                   000000000 ? 000000000 ?
kgerinv_internal()+  call     kgeadse()            7F0FE68599C0 ? 7F0FE0B90050
44                                                 000000258 ? 016036BEC
                                                   000000000 000000004
kgerinv()+40         call     kgerinv_internal()   7F0FE68599C0 ? 7F0FE0B90050 ?
                                                   000000258 ? 016036BEC ?
                                                   000000000 ? 000000004 ?
kgeasnmierr()+146    call     kgerinv()            7F0FE68599C0 ? 7F0FE0B90050 ?
                                                   000000258 ? 016036BEC ?
                                                   000000000 ? 000000004 ?
pfrsfm()+1055        call     kgeasnmierr()        7F0FE68599C0 ? 7F0FE0B90050 ?
                                                   000000258 ? 016036BEC ?
                                                   000000002 7F0FE0BA14F8
pfrspopstks()+103    call     pfrsfm()             7F0FE68599C0 ? 7F0FE0B90050 ?
                                                   000000258 ? 016036BEC ?
                                                   000000002 ? 7F0FE0BA14F8 ?
kksumc()+526         call     pfrspopstks()        7F0FE68599C0 ? 7F0FE0BA22F0
                                                   000000258 ? 016036BEC ?
                                                   000000002 ? 7F0FE0BA14F8 ?
opiodr()+4705        call     kksumc()             7F0FE0298488 000000005
                                                   000000258 ? 7F0FE68599C0
                                                   000000002 ? 7F0FE0BA14F8 ?
If we compare incident file of [pfrsfm: stack mismatch] and [pfrsfm: Stack disordered], we can see that they hit ORA-00600 at different pfrsfm locations:

pfrsfm()+1233

pfrsfm()+1055


3. Test Code



create or replace procedure clearup_test as
begin
  for c in (select * from dba_scheduler_jobs where job_name like '%TEST_JOB%') loop
    begin
      --set DBA_SCHEDULER_JOBS.enabled=FALSE
	    dbms_scheduler.disable (c.job_name, force => true, commit_semantics =>'ABSORB_ERRORS');
	    --set DBA_SCHEDULER_JOBS.enabled=TRUE, so that it can be scheduled to run (state='RUNNING')
	    --  dbms_scheduler.enable (c.job_name, commit_semantics =>'ABSORB_ERRORS');
	  exception when others then null;
	  end;
	end loop;
	
  for c in (select * from dba_scheduler_running_jobs where job_name like '%TEST_JOB%') loop
    begin
      --If force=FALSE, gracefully stop the job, slave process can update the status of the job in the job queue.
      --If force= TRUE, the Scheduler immediately terminates the job slave.
      --For repeating job with attribute "start_date => systimestamp" and enabled=TRUE, 
      --re-start immediate (state changed from 'SCHEDULED' to 'RUNNING'), DBA_SCHEDULER_JOBS.run_count increases 1.
	    dbms_scheduler.stop_job (c.job_name, force => true, commit_semantics =>'ABSORB_ERRORS');
	  exception when others then null;
	  end;
	end loop;
	
  for c in (select * from dba_scheduler_jobs where job_name like '%TEST_JOB%') loop
    begin
      --If force=TRUE, the Scheduler first attempts to stop the running job instances 
      --(by issuing the STOP_JOB call with the force flag set to false), and then drops the jobs.
	    dbms_scheduler.drop_job (c.job_name, force => true, commit_semantics =>'ABSORB_ERRORS');
	  exception when others then null;
	  end;
	end loop;
end;
/

drop type t_tab;
drop type t_row;

create type t_row as object (id number, name varchar2(50));
/

create type t_tab is table of t_row;
/

create or replace function pipeline_tab (p_rows in number, p_seconds number) return t_tab pipelined as
begin
  for i in 1 .. p_rows loop
    dbms_output.put_line('row: ' || i);
    pipe row(t_row(i, 'Name_' || i));
    if p_seconds > 0 then 
      dbms_session.sleep(p_seconds);
    end if;
  end loop;
  return;
end;
/

-- not needed
drop table test_tab;
create table test_tab(id number, label varchar2(10));
insert into test_tab(id, label) values(1, 'label');
commit;

create or replace procedure test_pipeline_proc_recur(p_rows number, p_seconds number, p_depth number) as
begin
  if p_depth = 0 then
    for c in (select * from table(pipeline_tab(p_rows, p_seconds)) where  rownum <= p_rows-5) 
    loop
      dbms_output.put_line('id = '||c.id);
    end loop;
  else 
    test_pipeline_proc_recur(p_rows, p_seconds, p_depth - 1);
  end if;
  exception when others then 
    dbms_output.put_line('p_depth = '||p_depth||', ERROR: ' || sqlerrm);
end;
/

create or replace procedure test_job_proc(p_job_id number, p_loop_count number, p_rows number, p_seconds number, p_depth number) as
begin
  update test_tab set id = id+1;
  for i in 1..p_loop_count loop
    execute immediate 'begin test_pipeline_proc_recur('||p_rows||', '||p_seconds||', '||p_depth||'); end; ';
  end loop;
  commit;
end;
/

create or replace procedure start_jobs(p_job_count number, p_loop_count number, p_rows number, p_seconds number, p_depth number) as
begin
  for i in 1..p_job_count loop
    dbms_scheduler.create_job (
      job_name        => 'TEST_JOB_CRASHED_1'||i,
      job_type        => 'PLSQL_BLOCK',
      job_action      => 'begin test_job_proc('||i||', '||p_loop_count||', '||p_rows||', '||p_seconds||', '||p_depth||'); end;',    
      start_date      => systimestamp,
      --repeat_interval => 'systimestamp',
      auto_drop       => true,
      enabled         => true);
  end loop;
end;
/

create or replace procedure start_test(p_job_count number, p_loop_count number, p_rows number, p_seconds number, p_depth number) as
  l_cnt number := 0;
begin
  dbms_output.put_line('Start '||p_job_count||' Jobs at '||sysdate);
  start_jobs(p_job_count, p_loop_count, p_rows, p_seconds, p_depth);
  
  -- wait for all jobs running
  dbms_output.put_line('Wait all Jobs running at '||sysdate);
  loop
    dbms_session.sleep(1);
    select count(*) into l_cnt from dba_scheduler_running_jobs where job_name like 'TEST_JOB_CRASHED%';
    exit when l_cnt >= p_job_count;
  end loop;
  
  dbms_output.put_line('Start clearup at '||sysdate);
  clearup_test;
 
  -- wait for all jobs stopped
  dbms_output.put_line('Wait clearup at '||sysdate);
  loop
    dbms_session.sleep(1);
    select count(*) into l_cnt from dba_scheduler_running_jobs where job_name like 'TEST_JOB_CRASHED%';
    exit when l_cnt = 0;
  end loop;
  dbms_output.put_line('Test End at '||sysdate);
end;
/

-- exec start_jobs(1e2, 1e4, 1e6, 0, 5);
-- exec clearup_test;
-- exec test_job_proc(1, 1e4, 1e6, 0, 5);

--================== Test Run ==================--
--  exec start_test(400, 1e4, 1e6, 0, 5);