Tuesday, March 6, 2018

'enq: RO - fast object reuse' and 'enq: KO - fast object checkpoint' Final Blocker

In Wait Event: 'enq: RO - fast object reuse' and 'enq: KO - fast object checkpoint', CKPT is shown as final_blocking_session when one DBWR is suspended.

V$ENQUEUE_STATISTICS gives detailed information (several rows for same enqueues with different reasons) and statistics.

select * from v$enqueue_statistics where eq_type in ('RO', 'KO');

  EQ_NAME                    EQ REQ_REASON             TOTAL_REQ# TOTAL_WAIT# SUCC_REQ# FAILED_REQ# CUM_WAIT_TIME REQ_DESCRIPTION                          
  -------------------------- -- ---------------------- ---------- ----------- --------- ----------- ------------- ---------------------------------------- 
  Multiple Object Reuse      RO fast object reuse           1,039          82     1,038           0     1,040,219 Coordinates fast object reuse            
  Multiple Object Reuse      RO contention                      0           0         0           0             0 Coordinates flushing of multiple objects 
  Multiple Object Checkpoint KO fast object checkpoint         60           6        60           0       870,104 Coordinates fast object checkpoint       
  Reuse Block Range          CR block range reuse ckpt      1,848          15     1,848           0            20 Coordinates fast block range reuse ckpt  
If (TOTAL_REQ# - SUCC_REQ# > 0 and FAILED_REQ# = 0), there exists on-going EQ requests.

Note: All tests are done in Oracle 12.1.0.2 on Solaris.


1. Test Setup



--Test DB is configured with:
--  db_writer_processes=2
   
drop table tab1;

create table tab1 as select level x, rpad('ABC', 100, 'X') y from dual connect by level < 1000000; 


2. 'enq: KO - fast object checkpoint' Test


Suspend one of DB WRITERs, for example, DBW1:

SQL> oradebug setorapid 12
Oracle pid: 12, Unix process pid: 19980, image: oracle@s5d00003 (DBW1)
SQL> oradebug suspend
Statement processed.
Run query:

SQL (543) > select /*+ parallel(t 4) */ count(*) from tab1 t where x < 100000;
Both blocker_sid and final_blocking_session are same, that is, CKPT (SID: 361):

SQL (123) > select c.chain_id, c.chain_signature, c.sid, c.blocker_sid bsid, final_blocking_session final_bsid, s.program, s.sql_id, s.event
            from v$wait_chains c, v$session s
            where c.sid = s.sid order by c.chain_id, s.program;

CHAIN_ID CHAIN_SIGNATURE                                         SID BSID FINAL_BSID PROGRAM     SQL_ID        EVENT
-------- ------------------------------------------------------- --- ---- ---------- ----------- ------------- -------------------------------
       1 'rdbms ipc message'<='enq: KO - fast object checkpoint' 361                 (CKPT)                    rdbms ipc message
       1 'rdbms ipc message'<='enq: KO - fast object checkpoint' 543 361         361 sqlplus.exe 4wwzmhypuvnc6 enq: KO - fast object checkpoint

SQL (123) > select sql_id, executions, substr(sql_text, 1, 60) sql_text from v$sql v where sql_id = '4wwzmhypuvnc6';

SQL_ID        EXECUTIONS SQL_TEXT
------------- ---------- ------------------------------------------------------------
4wwzmhypuvnc6          0 select /*+ parallel(t 4) */ count(*) from tab1 t where x < 1
After test, resume DBW1:

SQL> oradebug resume
By the way, to make above behavior repeatable, each time tab1 has to be dropped and recreated.


3. 'enq: RO - fast object reuse' Test


Again suspend DBW1 (same behaviour if suspend LGWR):

SQL> oradebug setorapid 12
Oracle pid: 12, Unix process pid: 19980, image: oracle@s5d00003 (DBW1)
SQL> oradebug suspend
Statement processed.
Run statement:

SQL (543) > truncate table tab1;
Both blocker_sid and final_blocking_session are same, that is, CKPT (SID: 361):

SQL (123) > select c.chain_id, c.chain_signature, c.sid, c.blocker_sid bsid, final_blocking_session final_bsid, s.program, s.sql_id, s.event
            from v$wait_chains c, v$session s
            where c.sid = s.sid order by c.chain_id, s.program;
            
CHAIN_ID CHAIN_SIGNATURE                                    SID BSID FINAL_BSID PROGRAM     SQL_ID        EVENT
-------- -------------------------------------------------- --- ---- ---------- ----------- ------------- ---------------------------
       1 'rdbms ipc message'<='enq: RO - fast object reuse' 361                 (CKPT)                    rdbms ipc message
       1 'rdbms ipc message'<='enq: RO - fast object reuse' 543  361        361 sqlplus.exe 83w7b0h5ug6ph enq: RO - fast object reuse  
       
SQL (123) > select sql_id, executions, substr(sql_text, 1, 60) sql_text from v$sql v where sql_id = '83w7b0h5ug6ph';         

SQL_ID        EXECUTIONS SQL_TEXT
------------- ---------- -------------------
83w7b0h5ug6ph          0 truncate table tab1     
After test, resume DBW1:

SQL> oradebug resume  
By the way, we observed: "enq: CR - block range reuse ckpt", also blocked by CKPT, when running PL/SQL bulk SQL "forall insert into values".


4. FINAL_BLOCKING_SESSION


In both above tests, CKPT is shown as final_blocking_session, but in reality, that is DBW1 suspended. The reason is probably because Foreground process only communicates with CKPT, and has no direct IPC call of DB WRITER processes.

Oracle MOS documented this behavior:
    MOS Resolving Issues Where 'enq: RO - fast object reuse' Contention Seen During Drop or Truncate Operations (Doc ID 1475659.1)
The RO enqueue known as "Multiple object reuse" enqueue, is used to synchronise operations between foreground process 
and a background process such as DBWR or CKPT. It is typically used when dropping objects or truncating tables.
Following is the sequence of events When a truncate/drop occurs:
  1.Foreground process acquires the "RO" enqueue in exclusive mode
  2.Cross instance calls (or one call if it is a single object) are issued ("CI" enqueue is acquired)
  3.CKPT Processes on each of instances requests the DBWR to write the dirty buffers to the disk and invalidate all the clean buffers.
  4.After DBWR completes writing all blocks, the foreground process releases the RO enqueue.


5. Discussions


KO enqueue is triggered by SELECT table FULL scan, which requires writing all dirty buffer blocks into disk datafiles and updating Extent Bitmap in Segment Header Extent.

RO enqueue is triggered by TRUNCATE table, which requires cleaning up (write) segment header (bitmap info), that is the 2nd Block (Extent Bitmap) in Segment Header Extent for small table.

When both enqueues appear in top Wait Events, following Events can be also observed:
  free buffer waits
  buffer busy waits
  local write wait
  write complete waits
For "free buffer waits", P3 (set-id#) reveals the responsible DBWR.

Blog: ASSM Truncate demonstrated RO enqueue when truncating table with big "INITIAL", and explained that the blocks written were the space management bitmap blocks for the extent(s) that remained after the truncate (storage_clause "INITIAL" specifies the size of the first extent of the object. Oracle allocates space for this extent when you create the schema object).


6. row cache lock


In 'enq: RO - fast object reuse' Test, if we open one more Sqlplus Session (SID=726) and perform the same tab1 truncate, we observe "row cache lock" with "cache id = 8" (dc_objects/dc_object_grants).

drop table tab1;
create table tab1 as select level x, rpad('ABC', 100, 'X') y from dual connect by level < 1000000; 

SQL> oradebug setorapid 12
Oracle pid: 12, Unix process pid: 19980, image: oracle@s5d00003 (DBW1)
SQL> oradebug suspend
Statement processed.

SQL (543) > truncate table tab1;

SQL (726) > truncate table tab1;

select c.wait_event_text, c.sid, c.blocker_sid bsid, s.final_blocking_session final_bsid, 
       s.program, s.sql_id, s.p1text, s.p1, s.p2text, s.p2, s.p3text, s.p3
from v$wait_chains c, v$session s
where c.sid = s.sid order by c.chain_id, s.program;

WAIT_EVENT_TEXT             SID BSID FINAL_BSID PROGRAM      SQL_ID        P1TEXT               P1  P2TEXT      P2  P3TEXT  P3
--------------------------- --- ---- ---------- ------------ ------------- --------- -------------  ------  ------  ------  --
rdbms ipc message           361                  (CKPT)                    timeout             300              0            0
row cache lock              726  543        361  sqlplus.exe 83w7b0h5ug6ph cache id              8  mode        0   request  3
enq: RO - fast object reuse 543  361        361  sqlplus.exe 83w7b0h5ug6ph name|mode 1,380,909,062  2       65,563  0        1
If we create one second table (tab2), and let new Sqlplus Session (SID=726) truncate tab2, both sessions are on "enq: RO - fast object reuse", no more "row cache lock" observed.

drop table tab1;
create table tab1 as select level x, rpad('ABC', 100, 'X') y from dual connect by level < 1000000; 

drop table tab2;
create table tab2 as select level x, rpad('ABC', 100, 'X') y from dual connect by level < 1000000; 

SQL> oradebug setorapid 12
Oracle pid: 12, Unix process pid: 19980, image: oracle@s5d00003 (DBW1)
SQL> oradebug suspend
Statement processed.

SQL (543) > truncate table tab1;

SQL (726) > truncate table tab2;

select c.wait_event_text, c.sid, c.blocker_sid bsid, s.final_blocking_session final_bsid, 
       s.program, s.sql_id, s.p1text, s.p1, s.p2text, s.p2, s.p3text, s.p3
from v$wait_chains c, v$session s
where c.sid = s.sid order by c.chain_id, s.program;

WAIT_EVENT_TEXT             SID BSID FINAL_BSID PROGRAM      SQL_ID        P1TEXT               P1  P2TEXT      P2  P3TEXT  P3
--------------------------- --- ---- ---------- ------------ ------------- --------- -------------  ------  ------  ------  --
rdbms ipc message           361                  (CKPT)                    timeout             300              0            0
enq: RO - fast object reuse 543  361        361  sqlplus.exe 83w7b0h5ug6ph name|mode 1,380,909,062  2       65,563  0        1
enq: RO - fast object reuse 726  361        361  sqlplus.exe 2wdshw3gfd81s name|mode 1,380,909,062  2       65,564  0        1

select sql_id, executions, substr(sql_text, 1, 60) sql_text 
from v$sql v where sql_id in ('83w7b0h5ug6ph', '2wdshw3gfd81s');

SQL_ID        EXECUTIONS SQL_TEXT
------------- ---------- -------------------
2wdshw3gfd81s          0 truncate table tab2
83w7b0h5ug6ph          0 truncate table tab1

Oracle 12.1 Wait Event: 'log file sync: SCN ordering' Demo

Oracle MOS Document:
         What is the Difference Between 'log File Sync: SCN Ordering' and 'log File Sync' (Doc ID 2363231.1)
    Log file sync: SCN ordering is a new wait event introduced in 12.1.
    
    This wait event is triggered when an operation such as a direct path load compares the current SCN against the last on-disk commit SCN.
      
    Before an SCN can be written to a datafile, a check is performed to ensure that an SCN larger than 
    the SCN needed to maintain "crash delta" has already been written to redo.
      
    If the check determines that the SCN to be written to the datafile is greater than that currently needed in the redo logs, 
    LGWR will be posted to force some redo to disk, independent of a commit.
      
    If no actual redo with an appropriate SCN is available in the log buffer, it is possible that an empty redo record may be written 
    to the redo logs to satisfy the need to have the higher SCN.
      
    Prior to 12.1, the time spent waiting for redo being written by LGWR for this purpose was included in the main "log file sync" wait.
    
    Starting in 12.1, it is now tracked separately as "log file sync: SCN ordering". It was separated out 
    because it is not related to transaction commit wait for log file write.
In short, 'log File Sync: SCN Ordering' is 'log File Sync' not triggered by transaction commit.
There exists one non-commit SCN to be written to a datafile, which is bigger than last LGWR SCN.
LGWR is in charge of sequentially ("ordering") writing all SCN between last LGWR SCN and non-commit SCN to Redo Log.

In this Blog, we will try to demonstrate Wait Event: 'log file sync: SCN ordering' without any COMMIT (ROLLBACK) statements.

Note: All tests are done in Oracle 12.1.0.2 on Solaris with 6 CPUs.


1. Test


Test DB is configured as:
   shared_pool_size=1408M
   log_buffer=128M       (note: real allocated is 140M)
   db_cache_size=2000M
   db_writer_processes=2
   cpu_count=6
Launch Test (see appended Test Code):

exec test_upd_no_commit_job(4);
exec test_load_as_select_job(2);
When all 6 Jobs are visible in dba_jobs_running, we can see 6 transactions, 2 of which are select statements with materialized subquery factoring clause (WITH clause).

SQL > select s.sid, s.program, s.sql_id, t.addr, t.status, t.start_time, t.start_scn 
      from v$transaction t, v$session s where t.addr = s.taddr order by s.program;

SID  PROGRAM               SQL_ID         ADDR              STATUS  START_TIME                   START_SCN
---  --------------------  -------------  ----------------  ------  -------------------  -----------------
 15  oracle@testdb (J000)  5h3nqwh6fdw66  0000000184EBA498  ACTIVE  2018-03-06 07:17:16  9,332,237,391,411
727  oracle@testdb (J001)  a3376g221j75v  0000000184FA3B38  ACTIVE  2018-03-06 07:31:22  9,332,237,491,530
370  oracle@testdb (J002)  5h3nqwh6fdw66  0000000184EBBBD8  ACTIVE  2018-03-06 07:17:16  9,332,237,391,419
549  oracle@testdb (J003)  5h3nqwh6fdw66  0000000186A2C818  ACTIVE  2018-03-06 07:17:16  9,332,237,391,757
192  oracle@testdb (J004)  a3376g221j75v  0000000184FA46D8  ACTIVE  2018-03-06 07:31:22  9,332,237,491,538
545  oracle@testdb (J005)  5h3nqwh6fdw66  0000000186BB3778  ACTIVE  2018-03-06 07:17:16  9,332,237,392,203
  
SQL > select sql_id, executions, substr(sql_text, 1, 50) sql_text 
      from v$sql where sql_id in ('a3376g221j75v', '5h3nqwh6fdw66');

SQL_ID        EXECUTIONS  SQL_TEXT
------------- ----------- --------------------------------------------------
a3376g221j75v      11,254 WITH SQ1 AS (SELECT /*+ materialize */ LEVEL X FRO
5h3nqwh6fdw66     392,362 UPDATE TEST_TAB SET VAL = 'Upd_'||:B2 WHERE ID = :  
Suspend LGWR:

SQL> oradebug setorapid 13
Oracle pid: 13, Unix process pid: 18482, image: oracle@testdb (LGWR)
SQL> oradebug suspend
after about 10 minutes, we can see blocking chains:

select c.chain_id, c.chain_signature, c.sid, c.blocker_sid bsid, final_blocking_session final_bsid, s.program, s.sql_id, s.event
from v$wait_chains c, v$session s
where c.sid = s.sid order by c.chain_id, s.program;

CHAIN_ID CHAIN_SIGNATURE                                                         SID  BSID  FINAL_BSID PROGRAM SQL_ID        EVENT
-------- ----------------------------------------------------------------------- ---- ----- ---------- ------- ------------ ---------------------------
       1 'rdbms ipc message'<='log file sync'                                       8 181          181 (J010)               log file sync
       1 'rdbms ipc message'<='log file sync'                                     181                  (LGWR)               rdbms ipc message
       2 'rdbms ipc message'<='log file sync: SCN ordering'<='buffer busy waits'   15 545          181 (J000) 5h3nqwh6fdw66 buffer busy waits
       2 'rdbms ipc message'<='log file sync: SCN ordering'<='buffer busy waits'  545 181          181 (J005) 5h3nqwh6fdw66 log file sync: SCN ordering
       3 'rdbms ipc message'<='log file sync'                                     189 181          181 (J011)               log file sync
       4 'rdbms ipc message'<='log file sync: SCN ordering'                       192 181          181 (J004) a3376g221j75v log file sync: SCN ordering
       5 'rdbms ipc message'<='log file sync: SCN ordering'<='buffer busy waits'  370 545          181 (J002) 5h3nqwh6fdw66 buffer busy waits
       6 'rdbms ipc message'<='log file sync'                                     548 181          181 (J007)               log file sync
       7 'rdbms ipc message'<='log file sync: SCN ordering'<='buffer busy waits'  549 545          181 (J003) 5h3nqwh6fdw66 buffer busy waits
       8 'rdbms ipc message'<='log file sync: SCN ordering'<='buffer busy waits'  719 545          181 (SMON)               buffer busy waits
       9 'rdbms ipc message'<='log file sync'                                     726 181          181 (J008)               log file sync
      10 'rdbms ipc message'<='log file sync: SCN ordering'                       727 181          181 (J001) a3376g221j75v log file sync: SCN ordering
      11 'rdbms ipc message'<='log file sync'                                     899 181          181 (J006)               log file sync
      12 'rdbms ipc message'<='log file sync'                                     903 181          181 (J009)               log file sync
Above output shows that only two non-commit sessions (SID; 192, 727 with SQL_ID: a3376g221j75v) hit Wait Event "log file sync: SCN ordering".

Look 'sync scn':
                                                                                                           
select distinct * from (
  select event, min(p2) over (partition by event) min_scn, max(p2) over (partition by event) max_scn
  from v$session 
  where p2text = 'sync scn' and event in ('log file sync',  'log file sync: SCN ordering')
) order by min_scn;

EVENT                             MIN_SCN        MAX_SCN
--------------------------- -------------  -------------
log file sync               3,568,537,824  3,568,549,488
log file sync: SCN ordering 3,568,549,406  3,568,549,406
which shows that SCN of 'log file sync: SCN ordering' falls between min and max SCN of 'log file sync' (3,568,537,824 < 3,568,549,406 < 3,568,549,488), in other words, there exists one SCN of 'log file sync' which is smaller than that of 'log file sync: SCN ordering' and one SCN of 'log file sync' which is bigger than that of 'log file sync: SCN ordering'. Probably that is the reason and result of 'SCN ordering'.

After the test, resume LGWR and clean up all Jobs:

SQL> oradebug resume
SQL> exec clean_jobs;
Collect AWR and ASH reports for the test interval, we can see:

Top 10 Foreground Events by Total Wait Time

Event Waits Total Wait Time (sec) Avg wait (ms) % DB time Wait Class
log file sync 14 7986.1 570437.20 29.5 Commit
buffer busy waits 23,544 4142.6 175.95 15.3 Concurrency
log file sync: SCN ordering 3 4139.8 1.4E+06 15.3 Concurrency
DB CPU 2184.4 8.1

Top SQL with Top Events

SQL ID %Activity Event %Event Top Row Source SQL Text
5h3nqwh6fdw66 32.83 buffer busy waits 19.35 UPDATE UPDATE TEST_TAB SET VAL = 'Upd..
CPU + Wait for CPU 7.03 TABLE ACCESS - FULL
log file sync: SCN ordering 6.45 UPDATE
a3376g221j75v 16.40 log file sync: SCN ordering 12.90 TEMP TABLE TRANSFORMATION WITH SQ1 AS (SELECT /*+ materi...
CPU + Wait for CPU 3.45 CONNECT BY - WITHOUT FILTERING

Occasionally we also observed statement:

  SQL ID:   g4gp07gt2z920 
  SQL Text: update sys.scheduler$_job set last_start_date = :1, running_instance = :2, running_slave = :3, job_status = :4 where obj# = :5
is blocked by 'log file sync: SCN ordering'.

In the above test, log_buffer is purposely set as a large value (128M). If we reduce it to a small value, for example, 2M (note: real allocated is 12M)

SQL> alter system set log_buffer=2M scope=spfile; 
SQL> startup force;
Repeat the same test by suspending LGWR. The blocking chains will be:

select c.chain_id, c.chain_signature, c.sid, c.blocker_sid bsid, final_blocking_session final_bsid, s.program, s.sql_id, s.event
from v$wait_chains c, v$session s
where c.sid = s.sid order by c.chain_id, s.program;

CHAIN_ID CHAIN_SIGNATURE                                              SID  BSID FINAL_BSID PROGRAM SQL_ID        EVENT
-------- ------------------------------------------------------------ ---- ---- ---------- ------- ------------- -----------------
       1 'rdbms ipc message'<='log buffer space'                      186  181         181 (J001)  a3376g221j75v log buffer space 
       1 'rdbms ipc message'<='log buffer space'                      181                  (LGWR)                rdbms ipc message
       2 'rdbms ipc message'<='log buffer space'<='buffer busy waits' 367  547         181 (J002)  5h3nqwh6fdw66 buffer busy waits
       2 'rdbms ipc message'<='log buffer space'<='buffer busy waits' 547  181         181 (J003)  5h3nqwh6fdw66 log buffer space 
       3 'rdbms ipc message'<='log buffer space'<='buffer busy waits' 724  181         181 (J004)  a3376g221j75v log buffer space 
       3 'rdbms ipc message'<='log buffer space'<='buffer busy waits' 542  724         181 (MMON)                buffer busy waits
       4 'rdbms ipc message'<='log buffer space'<='buffer busy waits' 545  547         181 (J000)  5h3nqwh6fdw66 buffer busy waits
       5 'rdbms ipc message'<='log buffer space'<='buffer busy waits' 719  724         181 (SMON)                buffer busy waits
       6 'rdbms ipc message'<='log buffer space'<='buffer busy waits' 902    8         181 (CJQ0)  c96x1j3nt4suf buffer busy waits
       6 'rdbms ipc message'<='log buffer space'<='buffer busy waits'   8  181         181 (J006)  g4gp07gt2z920 log buffer space 
       7 'rdbms ipc message'<='log buffer space'<='buffer busy waits' 905  547         181 (J005)  5h3nqwh6fdw66 buffer busy waits
No 'log file sync: SCN ordering' is visible because sessions are blocked by 'log buffer space' at first, and no more 'log file sync: SCN ordering' occurs.

From above blocking chains, we can see SMON, MMON and CJQ0 are also blocked by Job sessions, but all final_blocking_session are LGWR.

As a tradition, in Oracle, all popular terms are named twice, log_buffer is not an exception:
  V$SGAINFO.name='Redo Buffers'
  V$SGASTAT.name='log_buffer'


2. MMON ORA_12751_DUMP


During test, we also observed MMON ORA-12751 errors dump:

Unix process pid: 19998, image: oracle@testdb (MMON)
----- START DDE Action: 'ORA_12751_DUMP' (Sync) -----
Runtime exceeded 300 seconds
Time limit violation detected at:
ksedsts()+439<-kspol_12751_dump()+81<-dbgdaExecuteAction()+384<-dbgerRunAction()+105<-dbgerRunActions()+2117<-dbgexProcessError()+2271
<-dbgePostErrorKGE()+2169<-dbkePostKGE_kgsf()+58<-kgeade()+369<-kgeselv()+94<-ksesecl0()+181<-kcbzwb()+8821<-kcbgtcr()+84390
<-ktucloUsMinScn()+527<-ktucloUsegScan()+1017<-ksb_run_managed_action()+456<-ksbcti()+2029<-ksbabs()+2231<-ksbrdp()+1734<-opirip()+844
<-opidrv()+651<-sou2o()+122<-opimai_real()+317<-ssthrdmain()+558<-main()+164<-_start()+123Current Wait Stack:
 0: waiting for 'buffer busy waits'
    file#=0x3, block#=0x23c0, class#=0x11
    wait_id=1873 seq_num=1874 snap_id=1
    wait times: snap=5 min 6 sec, exc=5 min 6 sec, total=5 min 6 sec
    wait times: max=infinite, heur=5 min 6 sec
    wait counts: calls=306 os=306
    in_wait=1 iflags=0x15a2
There is at least one session blocking this session.
  Dumping 1 direct blocker(s):
    inst: 1, sid: 724, ser: 1289
  Dumping final blocker:
    inst: 1, sid: 181, ser: 52654
...
sample interval: 1 sec, max history 120 sec
---------------------------------------------------
  [121 samples,                                            09:07:49 - 09:09:49]
    waited for 'buffer busy waits', seq_num: 1874
      p1: 'file#'=0x3
      p2: 'block#'=0x23c0
      p3: 'class#'=0x11
      time_waited: >= 120 sec (still in wait)
---------------------------------------------------
Sampled Session History Summary:
  longest_non_idle_wait: 'buffer busy waits'
  [121 samples, 09:07:49 - 09:09:49]
      time_waited: >= 120 sec (still in wait)
---------------------------------------------------
----- END DDE Action: 'ORA_12751_DUMP' (SUCCESS, 3 csec) -----
----- END DDE Actions Dump (total 3 csec) -----
KEBM: MMON action policy violation. 'Block Cleanout Optim, Undo Segment Scan' viol=1; err=12751                                                                  
where 'class#'=0x11 (Decimal 17) indicates that MMON is 'buffer busy waits' on 'undo header'.

'ORA_12751_DUMP' is mentioned in MOS Docu:
    AWR Snapshots Fail to Generate With Associated "Suspending MMON action '%s' for 82800 seconds" alert log messages and Underlying ORA-12751 Errors (Doc ID 2043531.1)

As we know MMON (MMNL) is responsible to flush 1 out of 10 V$ACTIVE_SESSION_HISTORY (memory Circular Buffer in SGA) into disk DBA_HIST_ACTIVE_SESS_HISTORY each Snapshot interval or out-of-space by a direct path load (insert), which matches precisely what Doc ID 2363231.1 mentioned.


3. Test Code



drop table test_tab;
              
create table test_tab 
   INITRANS   26           -- prevent Segments ITL Waits and Deadlocks. 
as select level id, rpad('ABC', 100, 'X') val from dual connect by level <= 100;

create or replace procedure test_upd_no_commit(p_id number) is
begin
 for i in 1..1000000 loop
   update test_tab set val = 'Upd_'||i where id = p_id;
 end loop;
end;
/

create or replace procedure test_upd_no_commit_job(p_job_cnt number) as
  l_job_id pls_integer;
begin
  for i in 1..p_job_cnt loop
    dbms_job.submit(l_job_id, 'begin while true loop test_upd_no_commit('||i||'); end loop; end;');
  end loop;
  commit;
end;    
/

create or replace procedure test_load_as_select is
  type  num_tab is table of number;
 l_tab num_tab;
begin
  for i in 1..1000000 loop
   with sq1 as (select /*+ materialize */ level x from dual connect by level <= 1000000)
       ,sq2 as (select /*+ materialize */ x from sq1 where x <= 999999)
       ,sq3 as (select /*+ materialize */ x from sq1 where x <= 888888)
   select x bulk collect into l_tab from sq3
   where x <= 3; 
 end loop;
end;
/

create or replace procedure test_load_as_select_job(p_job_cnt number) as
  l_job_id pls_integer;
begin
  for i in 1..p_job_cnt loop
    dbms_job.submit(l_job_id, 'begin while true loop test_load_as_select; end loop; end;');
  end loop;
  commit;
end;    
/

create or replace procedure clean_jobs as
begin
  for c in (select job from dba_jobs) loop
    begin
       dbms_job.remove (c.job);
    exception when others then null;
    end;
    commit;
  end loop;

  for c in (select d.job, d.sid, (select serial# from v$session where sid = d.sid) ser 
              from dba_jobs_running d) loop
    begin
      execute immediate
             'alter system kill session '''|| c.sid|| ',' || c.ser|| ''' immediate';
      dbms_job.remove (c.job);
    exception when others then null;
    end;
    commit;
  end loop;
  
  -- select * from dba_jobs;
  -- select * from dba_jobs_running;
end;
/