In this Blog, we will try to construct such a case of CBC latch contention that one index leaf block split occurs when inserting a single row.
Note: Tested in Oracle 12c, 18c, 19c.
1. Test Setup
First we create a table with one reverse primary key index, and get object_id of that index:
drop table test_tab purge;
create table test_tab (id number, val number);
create index test_tab#r on test_tab(id) reverse;
-- CREATE TABLE test_tab (id NUMBER PRIMARY KEY USING INDEX (CREATE INDEX test_tab#r ON test_tab(id) REVERSE));
select object_id, to_char(object_id, 'XXXXXXXX') id_hex, data_object_id, to_char(data_object_id, 'XXXXXXXX') did_hex
from dba_objects where object_name = 'TEST_TAB#R';
OBJECT_ID ID_HEX DATA_OBJECT_ID DID_HEX
--------- ------ -------------- -------
2459815 2588A7 2459815 2588A7
2. Leaf Block Split Point Finding
With appended Plsql procedure: leaf_split_finder, we can find one exact point of leaf block split.
SQL > exec leaf_split_finder(5000);
--- Leaf Split at row count: 4708, (id between 1 and 11) in 2 Blocks
block: 2661260, cnt: 5, min_id: 1, max_id: 5
block: 3665613, cnt: 6, min_id: 6, max_id: 11
Above output shows that after inserting 4707 rows into table test_tab,
all the index entries for row from 1 to 11 are in the same block (2661260).
Now insert of one more row (4708) will cause a leaf block split.
The index entries for row from 1 to 5 remain in block (2661260),
but index entries for row from 6 to 11 are moved to a new block (3665613).
3. CBC Latch Contention Producing
Open a Plsql Window, truncate table, insert 4707 rows at first.
truncate table test_tab;
insert into test_tab select level, -level from dual connect by level <= 4707;
commit;
-- using above index object_id to substitute for 2458815
select blk, count(*), min(id) min_id, max(id) max_id from (
select id
,dbms_rowid.rowid_block_number(sys_op_lbid (2459815, 'L', rowid)) blk
from test_tab)
where id between 1 and 11
group by blk order by min_id, blk;
BLK COUNT(*) MIN_ID MAX_ID
------- -------- ------ ------
2661260 11 1 11
We can see all rows from 1 to 11 are in the same leaf block (2661260).Following query lists all 14 index blocks and number of entries in each of them, something like index entry distribution. The first BLK (2661260) has 534 entries.
select blk, count(*), min(id) min_id, max(id) max_id from (
select id
,dbms_rowid.rowid_block_number(sys_op_lbid (2461548, 'L', rowid)) blk
from test_tab)
group by blk order by min_id, blk;
BLK COUNT(*) MIN_ID MAX_ID
-------- --------- ------- -------
2661260 534 1 4707
3665593 527 12 4622
2661262 274 22 4627
3665612 269 28 4633
3665595 284 34 4639
3665611 279 40 4645
3665614 314 46 4700
3665598 311 52 4658
3665599 327 59 4665
3665596 319 66 4672
2661261 312 73 4678
3665594 311 79 4685
3665592 327 86 4692
3665597 319 93 4699
14 rows selected.
Now we start two groups of jobs, each group having 6 jobs in a query loop (see appended test_tab_select_job).
All 6 jobs in the first group access row 1 to 5;
whereas 6 jobs in the second group access row 6 to 10;
exec test_tab_select_job(6, 1e9, 1, 0);
exec test_tab_select_job(6, 1e9, 6, 0);
-- stop all jobs by:
-- exec clean_jobs;
Each job in the first group executes the query with id from 1 to 5 in a loop,
whereas job in the second group executes it with id from 6 to 10.
Both are with the same Xplan.
select val from test_tab where id = :B1
--------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 1 (100)| |
| 1 | TABLE ACCESS BY INDEX ROWID BATCHED| TEST_TAB | 1 | 26 | 1 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | TEST_TAB#R | 1 | | 1 (0)| 00:00:01 |
--------------------------------------------------------------------------------------------------
A query on v$session shows that all 12 jobs are running without any waiting event.
And repeated query on v$latch shows that 'cache buffers chains' SLEEPS is not increasing.
select s.client_identifier, s.sid, s.serial#, p.pid, p.spid, s.event, p1, p2, p3, latchwait, latchspin
from v$session s, v$process p
where s.paddr=p.addr and lower(s.program) like '%j0%'
order by s.client_identifier;
CLIENT_IDENTIFIER SID SERIAL# PID SPID EVENT P1 P2 P3 LATCHWAIT LATCHSPIN
-------------------- --- ------- --- ---- ----------------------- -- ----- -- --------- ---------
test_tab_select_id_1 900 65141 23 4653 db file sequential read 1 85437 1
test_tab_select_id_1 7 8997 24 4655 null event 0 0 0
test_tab_select_id_1 186 50273 25 4657 null event 0 0 0
test_tab_select_id_1 369 57072 26 4659 null event 0 0 0
test_tab_select_id_1 547 53721 27 4661 null event 0 0 0
test_tab_select_id_1 903 13140 35 4663 null event 0 0 0
test_tab_select_id_6 366 17587 38 4665 null event 0 0 0
test_tab_select_id_6 9 26667 42 4667 null event 0 0 0
test_tab_select_id_6 189 13145 43 4669 null event 0 0 0
test_tab_select_id_6 549 36479 45 4671 null event 0 0 0
test_tab_select_id_6 726 40173 46 4673 null event 0 0 0
test_tab_select_id_6 906 16667 47 4675 null event 0 0 0
select addr, gets, misses, sleeps, spin_gets, wait_time, round(wait_time/sleeps) us_per_sleep
from v$latch where name = 'cache buffers chains';
ADDR GETS MISSES SLEEPS SPIN_GETS WAIT_TIME US_PER_SLEEP
---------------- --------- ------ ------ --------- --------- ------------
0000000060027E88 181375987 795727 1 795699 25 25
Now insert a single new row:
insert into test_tab values(4708, -4708);
select blk, count(*), min(id) min_id, max(id) max_id from (
select id
,dbms_rowid.rowid_block_number(sys_op_lbid (2459815, 'L', rowid)) blk
from test_tab)
where id between 1 and 11
group by blk order by min_id, blk;
BLK COUNT(*) MIN_ID MAX_ID
------- -------- ------ ------
2661260 5 1 5
3665613 6 6 11
One leaf block split occurs. Row 6 to 11 are moved to new block (3665613).Look again index entry distribution, now it has 15 index blocks. The previous 534 entries in BLK (2661260) are split into two parts, 272 in original block, 262 in new created BLK (3665613), which also contains the new inserted row (id: 4708). So 534 is probably the split point in such case. If we look the third BLK (3665593), which has already 527 index entries, only 7 rows away from next leaf block split. It means that in certain case, the distance of two split points can be as short of 7 rows for 4715 (4708+7) row inserts (average should be 4715/15 = 314), an uneven split point distribution in case of reverse index. In later section, we will give further look of index block split distributions.
select blk, count(*), min(id) min_id, max(id) max_id from (
select id
,dbms_rowid.rowid_block_number(sys_op_lbid (2461548, 'L', rowid)) blk
from test_tab)
group by blk order by min_id, blk;
BLK COUNT(*) MIN_ID MAX_ID
-------- --------- ------- -------
2661260 272 1 4705
3665613 263 6 4708
3665593 527 12 4622
2661262 274 22 4627
3665612 269 28 4633
3665595 284 34 4639
3665611 279 40 4645
3665614 314 46 4700
3665598 311 52 4658
3665599 327 59 4665
3665596 319 66 4672
2661261 312 73 4678
3665594 311 79 4685
3665592 327 86 4692
3665597 319 93 4699
15 rows selected.
Now if we look again all 12 job sessions:
select s.client_identifier, s.sid, s.serial#, p.pid, p.spid, s.event, p1, p2, p3, latchwait, latchspin
from v$session s, v$process p
where s.paddr=p.addr and lower(s.program) like '%j0%'
order by s.client_identifier;
CLIENT_IDENTIFIER SID SERIAL# PID SPID EVENT P1 P2 P3 LATCHWAIT LATCHSPIN
-------------------- --- ------- --- ---- --------------------------- ---------- ------- -- ---------------- ---------
test_tab_select_id_1 900 65141 23 4653 null event 0 0 0
test_tab_select_id_1 7 8997 24 4655 null event 0 0 0
test_tab_select_id_1 186 50273 25 4657 null event 0 0 0
test_tab_select_id_1 369 57072 26 4659 null event 0 0 0
test_tab_select_id_1 547 53721 27 4661 latch: cache buffers chains 6630918056 228 0
test_tab_select_id_1 903 13140 35 4663 latch: cache buffers chains 6630918056 228 0
test_tab_select_id_6 366 17587 38 4665 latch: cache buffers chains 6632747048 228 0
test_tab_select_id_6 9 26667 42 4667 buffer busy waits 1548 3665613 1
test_tab_select_id_6 189 13145 43 4669 latch: cache buffers chains 6632747048 228 0 000000018B57B028
test_tab_select_id_6 549 36479 45 4671 latch: cache buffers chains 6632747048 228 0 000000018B57B028
test_tab_select_id_6 726 40173 46 4673 buffer busy waits 1548 3665613 1
test_tab_select_id_6 906 16667 47 4675 latch: cache buffers chains 6632747048 228 0 000000018B57B028
There are two wait events: "buffer busy waits" (p1text=file#, p2text=block#, p3text=class#)
or "latch: cache buffers chains" (p1text=address, p2text=number, p3text=tries.
From Oracle 18c, p3text renamed as "why").For "buffer busy waits", they are waiting on new spli index block (file#=1548, block#=3665613).
For "latch: cache buffers chains", first group are waiting in address 6630918056 (0x18B3BC7A8), whereas second group in 6632747048 (0x18B57B028).
The second group also shows column v$process.LATCHWAIT (000000018B57B028), which is the child latch address the process is waiting for (probably only filled where there exists latch sleep).
Imagine that a production system was running smoothly all morning, Suddenly, a single new row insert slowed it down with overwhelming CBC latch concurrency.
Repeated query on v$latch shows the continously increasing of latch SLEEPS (1460647) and WAIT_TIME (905492637).
select addr, gets, misses, sleeps, spin_gets, wait_time, round(wait_time/sleeps) us_per_sleep
from v$latch where name = 'cache buffers chains';
ADDR GETS MISSES SLEEPS SPIN_GETS WAIT_TIME US_PER_SLEEP
---------------- ---------- -------- ------- --------- --------- ------------
0000000060027E88 1853839207 96374121 1460647 95066763 905492637 620
A further query on v$latch_children shows majority of CBC SLEEPS (1460317) is on child latch: 000000018B57B028
of second group. Even though the child latch: 000000018B3BC7A8 of first group has more GETS (428020322),
it has much less MISSES, almost no SLEEPS and WAIT_TIME.
select latch#, child#, addr, gets, misses, sleeps, spin_gets, wait_time, round(wait_time/sleeps) us_per_sleep
from v$latch_children
where addr = hextoraw('000000018B3BC7A8') or addr = hextoraw('000000018B57B028');
LATCH# CHILD# ADDR GETS MISSES SLEEPS SPIN_GETS WAIT_TIME US_PER_SLEEP
------ ------ ---------------- --------- -------- ------- --------- --------- ------------
228 3156 000000018B3BC7A8 428020322 1027200 4 1027196 108 27
228 12682 000000018B57B028 380009579 90239103 1460317 88932209 905458824 620
With one more query on v$latch_misses, we can display CBC latch contention locations.
select * from v$latch_misses
where parent_name = 'cache buffers chains' and sleep_count > 0
order by sleep_count desc;
PARENT_NAME WHERE NWFAIL_COUNT SLEEP_COUNT WTR_SLP_COUNT LONGHOLD_COUNT LOCATION
-------------------- ------------------------ ------------ ----------- ------------- -------------- -----------------------
cache buffers chains kcbgtcr: fast path pin 0 1341199 2297098 197030 kcbgtcr: fast path pin
cache buffers chains kcbchg: cr pin cleanout 0 694146 34319 86549 kcbchg: cr pin cleanout
cache buffers chains kcbchg: cr pin change 0 182979 13069 23671 kcbchg: cr pin change
cache buffers chains kcbgtcr: slow path excl 0 124511 5269 5873 kcbgtcr: slow path excl
cache buffers chains kcbchg: change complete 0 30195 8539 2881 kcbchg: change complete
cache buffers chains kcbcge 0 23214 30005 6855 kcbcge
cache buffers chains kcbzwb 0 15599 9668 2013 kcbzwb
cache buffers chains kcbrls: pin release 0 10319 18689 2147 kcbrls: pin release
cache buffers chains kcb_trim_hash_chain 0 3527 109 712 kcb_trim_hash_chain
cache buffers chains kcbrls: fast pin release 0 1069 10173 503 kcbrls: fast pin release
cache buffers chains kcbbxsv 0 2 0 2 kcbbxsv
cache buffers chains kcbgcur: slow path 0 1 0 0 kcbgcur: slow path
The top Location (SLEEP_COUNT=1341199, LONGHOLD_COUNT=197030) is "kcbgtcr: fast path pin".With one further query on x$bh, we can link the CBC_LATCH to protected block.
select * from (
with sq as (select object_name, data_object_id
from dba_objects where object_name like 'TEST_TAB%')
,bh as (select hladdr, obj, file#, dbablk, sum(tch) tch
from x$bh group by hladdr, obj, file#, dbablk)
select hladdr cbc_latch_addr
,sum(tch) tch
,listagg(tch || '-' || obj || '(' || object_name || ')/' || file# || '/' ||dbablk, ';')
within group (order by tch desc) tch_list -- "tch-obj(name)/file/blk_list"
,count(*) blk_cnt
from bh, sq
where bh.obj = sq.data_object_id
and tch > 0
--and (hladdr like '%18B3DB828%' or hladdr like '%18B3BACA8')
group by hladdr
order by tch desc)
where 1=1
and (tch_list like '%TEST_TAB%');
CBC_LATCH_ADDR TCH TCH_LIST BLK_CNT
---------------- --- ----------------------------------- -------
000000018B851DA8 3 3-2459816(TEST_TAB#R)/1548/2661258 1 <== root block (kdxcolev 1)
000000018B3BC7A8 213 213-2459816(TEST_TAB#R)/1548/2661260 1 <== leaf block for row 1 to 5 (kdxcolev 0)
000000018B57B028 5 5-2459816(TEST_TAB#R)/1548/3665613 1 <== leaf block for row 6 to 11 (kdxcolev 0)
It shows that the contention latch (000000018B57B028) has much lower TCH than that of non contention latch (000000018B3BC7A8).
This observation also raises an immediate question: can we use TCH to determine CBC latch contention or Hot Block ?
4. Latch State Transition Diagram By Dtrace
Pick one CBC latch contention session (SID: 549, Oracle PID: 45 (0x2D), spid: 4671) from above v$session and v$process query.
Run following Dtrace Script on contention latch address: 0x18B57B028 (For Latch State Transition Diagram, See Blog: PL/SQL Function Result Cache Invalidation (I) ).
sudo dtrace -n \
'
typedef unsigned int ub4;
typedef struct latchstate /* latch state */ {
ub4 nproc_pid; /* Nproc or pid */
ub4 flag; /* blocking flag */
ub4 gets; /* number of gets */
ub4 latch; /* latch number */
ub4 level; /* latch level */
ub4 where; } latchstate;
BEGIN {laddr = $1; step = 1; begin_gets = 0, end_gets = 0; prev_ts = timestamp;
self->evt = "begin"; self->loc = 0x0;}
pid$target::ksl_get_shared_latch:entry /arg0 == laddr && (self->evt != probefunc || self->loc != arg3)/ {
printf("\n---------Step %d at %Y (Elapsed-ns=%d)---------", step, walltimestamp, (timestamp-prev_ts));
ls = ((latchstate *) copyin(arg0, sizeof(latchstate)));
begin_gets = (begin_gets == 0 ? ls->gets: begin_gets); end_gets = ls->gets;
printf("\n***State Entry>>>(nproc_pid=>%d, flag=>%d, gets=>%d, latch=>%d, level=>%d, where=>%d)"
,ls->nproc_pid, ls->flag, ls->gets, ls->latch, ls->level, ls->where);
printf("\n>>>Transition>>>%s:%s\n (Addr=>0x%X, Immed_Get=>%d, Why=>%d, Where=>%d(0x%X), Mode=>%d, PID=>0x%X)",
probefunc, probename, arg0, arg1, arg2, arg3, arg3, arg4, arg5);
self->evt = probefunc; self->loc = arg3;
ustack(10, 0);
/* stop(); exit(0); */ /* no latch blocking, failed get, try again */
step = step + 1; self->entry = 1; prev_ts = timestamp;}
pid$target::ksl_get_shared_latch:return /self->entry == 1/ {
ls = ((latchstate *) copyin(laddr, sizeof(latchstate)));
printf("\n>>>State Return***(nproc_pid=>%d, flag=>%d, gets=>%d, latch=>%d, level=>%d, where=>%d)"
,ls->nproc_pid, ls->flag, ls->gets, ls->latch, ls->level, ls->where);
/* stop(); exit(0); */ /* simulate long latch holder, run by: dtrace -w -n */
self->entry = 2;}
pid$target::kslfre:entry /self->entry == 2 && arg0 == laddr && self->evt != probefunc/ {
printf("\n---------Step %d at %Y (Elapsed-ns=%d)---------", step, walltimestamp, (timestamp-prev_ts));
ls = ((latchstate *) copyin(arg0, sizeof(latchstate)));
end_gets = ls->gets;
printf("\n>>>Transition>>>%s:%s(Addr=>0x%-X)", probefunc, probename, arg0);
self->evt = probefunc; self->loc = 0x0;
/* stop(); exit(0); */ /* simulate long latch holder, run by: dtrace -w -n */
printf("\n===Latch freed===");
ustack(10, 0); step = step + 1; prev_ts = timestamp;}
END {
printf("\n---------Step %d at %Y (Elapsed-ns=%d)---------", step, walltimestamp, (timestamp-prev_ts));
printf("\nLatch Gets = %d (%d - %d)", (end_gets - begin_gets), (unsigned long)begin_gets, (unsigned long)end_gets); }
' -p 4671 0x18B57B028
The output is some repeated occurrences of following pattern:
ksl_get_shared_latch:entry
---------Step 599 at 2020 Feb 20 14:10:20 (Elapsed-ns=2112100)---------
***State Entry>>>(nproc_pid=>0, flag=>0, gets=>632751866, latch=>228, level=>1, where=>2195)
>>>Transition>>>ksl_get_shared_latch:entry
(Addr=>0x18B57B028, Immed_Get=>1, Why=>0, Where=>2298(0x8FA), Mode=>16, PID=>0x200000000000002D)
oracle`ksl_get_shared_latch
a.out`kcbzwb+0xa33
a.out`kcbgtcr+0x14a77
a.out`ktrget2+0x292
a.out`qeilbk1+0x3cf
a.out`qeilsr+0xbd
a.out`qerixFetch+0x1f9
a.out`qerstFetch+0x543
a.out`qertbFetchByRowID+0x39e
a.out`qerstFetch+0x4ca
ksl_get_shared_latch:return
>>>State Return***(nproc_pid=>45, flag=>536870912, gets=>632751872, latch=>228, level=>1, where=>2298)
kslfre:entry
---------Step 600 at 2020 Feb 20 14:10:20 (Elapsed-ns=63625)---------
>>>Transition>>>kslfre:entry(Addr=>0x18B57B028)
===Latch freed===
oracle`kslfre
a.out`kcbgtcr+0x540f
a.out`ktrget2+0x292
a.out`qeilbk1+0x3cf
a.out`qeilsr+0xbd
a.out`qerixFetch+0x1f9
a.out`qerstFetch+0x543
a.out`qertbFetchByRowID+0x39e
a.out`qerstFetch+0x4ca
a.out`opifch2+0x188b
It shows the CBC Latch State Transition.
At first, C subroutine ksl_get_shared_latch (shared Latch (in S Mode or X Mode)) was called
to get exclusive latch (Mode=>16) in entry location (where=>2195) and next location (where=>2298).
Then freed by kslfre.
CallStack shows that each row is accessed by qertbFetchByRowID via qeilsr
(TABLE ACCESS BY INDEX via INDEX RANGE SCAN).
For the where location, we can use the query below to find them.
CREATE OR REPLACE FORCE VIEW X_GV$LATCH_MISSES as
SELECT t1.indx INDX,
t1.inst_id INST_ID,
t1.ksllasnam PARENT_NAME,
t2.ksllwnam "WHERE",
t1.kslnowtf NWFAIL_COUNT,
t1.kslsleep SLEEP_COUNT,
t1.kslwscwsl WTR_SLP_COUNT,
t1.kslwsclthg LONGHOLD_COUNT,
t2.ksllwnam LOCATION,
t1.con_id CON_ID
FROM x$ksllw t2, x$kslwsc t1
WHERE t2.indx = t1.indx;
select * from sys.X_GV$LATCH_MISSES
where parent_name in ('cache buffers chains')
and indx in (2195, 2298, 2162);
INDX INST_ID PARENT_NAME WHERE NWFAIL_COUNT SLEEP_COUNT WTR_SLP_COUNT LONGHOLD_COUNT LOCATION
---- ------- -------------------- ---------------------- ------------ ----------- ------------- -------------- ---------------------
2118 1 cache buffers chains kcbchg: cr pin cleanout 0 1946366 110291 243655 kcbchg: cr pin cleanout
2195 1 cache buffers chains kcbgtcr: fast path pin 0 2594671 5314421 307276 kcbgtcr: fast path pin
2298 1 cache buffers chains kcbzwb 0 59480 48922 7353 kcbzwb
In the above Dtrace script, if we uncomment the line inside "kslfre:entry":
/* stop(); exit(0); */ /* simulate long latch holder, run by: dtrace -w -n */
the process was stopped (process STATE: stop) and latch will not be freed, hence to simulate long latch holder.
(v$session.state: "WAITED SHORT TIME", v$active_session_history.session_state: "ON CPU").Repeated query on v$latchholder will show GETS is no more changed.
select * from v$latchholder;
PID SID LADDR NAME GETS
--- --- ---------------- -------------------- ----------
45 549 000000018B57B028 cache buffers chains 761549981
Note that if we "stop()" inside "ksl_get_shared_latch:entry",
there will be no latch blocking since latch child (000000018B57B028) Get failed
and the latch was not acquired.
(v$session.state: "WAITING", v$active_session_history.session_state: "WAITING").However, if we "stop()" inside "ksl_get_shared_latch:return", there will be latch blocking since latch was already acquired, it should be released before any other sessions can get it.
By the way,
v$active_session_history.session_state
is defined as
DECODE (a.wait_time, 0, 'WAITING', 'ON CPU')
That means if "wait_time" is not 0, the session_state is denoted as 'ON CPU'.
In this example, UNIX process STATE: "stop" is conveyed to v$active_session_history.session_state: 'ON CPU'.
In this case, if v$active_session_history.session_state is aggregated (in an interval) to assess UNIX CPU utilization,
the outcome from Oracle will differ from real UNIX CPU utilization.As of 12c, v$session.WAIT_TIME has been deprecated in favor of the columns WAIT_TIME_MICRO and STATE (Strangely, derived v$active_session_history.WAIT_TIME and SESSION_STATE are not yet deprecated in Oracle 12c Documentation).
5. Buffer Cache Activity Internals
In Oracle, each CBC latch protects a set of hash buckets (32). Each hash bucket is a linked list of buffer headers. Each data (disk) block is cloned as a few buffer blocks with different status (CR, XCUR) into buffer cache (Maximum Allowed Number of CR buffers per dba is controlled by "_db_block_max_cr_dba"). All buffer blocks of same data (disk) block are hashed to the same hash bucket, hence protected by the same CBC latch. If we query the new block (3665613) protected by the CBC contention latch (000000018B57B028) twice, two example outputs are:
select hladdr, decode(state,0,'free',1,'xcur',2,'scur',3,'cr', state) status,
decode (bitand (flag, 1),0,'N','Y') dirty, ba, addr, nxt_hash, prv_hash, tch --, v.*
from x$bh v
where dbablk in (3665613)
order by v.state, indx;
HLADDR STATUS DIRTY BA ADDR NXT_HASH PRV_HASH TCH
---------------- ------ ----- ---------------- ---------------- ---------------- ---------------- ---
000000018B57B028 xcur Y 00000000CA3BA000 FFFF80FFBC132E60 000000018AE0DAC8 00000000CAFD2450 0
000000018B57B028 cr N 00000000CA8E2000 FFFF80FFBC1341E8 00000000CAFED610 000000018AE0DAC8 0
000000018B57B028 cr N 00000000CAE24000 FFFF80FFBC134090 00000000CAF9EDD0 00000000CAFB8CD0 1
000000018B57B028 cr N 00000000CA64A000 FFFF80FFBC133F38 00000000CAF6CA10 00000000CAFED610 1
000000018B57B028 cr N 00000000CA144000 FFFF80FFBC133DE0 00000000CAF85290 00000000CAF9EDD0 1
000000018B57B028 cr N 00000000CA3B8000 FFFF80FFBC133110 00000000CAFD2450 00000000CAF6CA10 1
000000018B57B028 cr N 00000000CAB6E000 FFFF80FFBC132FB8 00000000CAF853D0 00000000CAF85290 1
HLADDR STATUS DIRTY BA ADDR NXT_HASH PRV_HASH TCH
---------------- ------ ----- ---------------- ---------------- ---------------- ---------------- ---
000000018B57B028 xcur Y 00000000BE3E6000 FFFF80FFBC18FE08 00000000BEFD76D0 00000000BEF70250 0
000000018B57B028 cr N 00000000BEE80000 FFFF80FFBC115898 00000000BEFBC650 000000018AE0DAC8 0
000000018B57B028 cr N 00000000BE93E000 FFFF80FFBC115740 00000000C2F68690 00000000BEFF0F90 1
000000018B57B028 cr N 00000000C20D8000 FFFF80FFBC1155E8 00000000BEF70610 00000000BEFBC650 1
000000018B57B028 cr N 00000000BE1A4000 FFFF80FFBC115490 00000000BEFA2D90 00000000C2F68690 1
000000018B57B028 cr N 00000000BE6B0000 FFFF80FFBC1900B8 00000000BEF70250 00000000BEF70610 1
000000018B57B028 cr N 00000000BE19E000 FFFF80FFBC18FF60 00000000BEF86F50 00000000BEFA2D90 1
000000018B57B028 cr N 00000000BEBF2000 FFFF80FFBC18FCB0 00000000BEFBFE90 00000000BEF86F50 0
000000018B57B028 cr N 00000000BE998000 FFFF80FFBC18FB58 000000018AE0DAC8 00000000BEFD76D0 0
We can see that there exists one single DIRTY buffer block ('Y') with STATUS ('xcur').
The number of rows returned is changing in each execution.
All other memory address fields (BA, ADDR, NXT_HASH, PRV_HASH) are different in each execution of the query.
That means this buffer chain is constantly changed. For any buffer chain modification, an exclusive latch get
is requested. It results in CBC latch contention.
However, if we query the original block (2661260) protected by the CBC contention latch (000000018B3BC7A8) a few times,
the output are the same and the 'xcur' buffer block is not DIRTY ('N').:
select hladdr, decode(state,0,'free',1,'xcur',2,'scur',3,'cr', state) status,
decode (bitand (flag, 1),0,'N','Y') dirty, ba, addr, nxt_hash, prv_hash, tch --, v.*
from x$bh v
where dbablk in (2661260)
order by v.state, indx;
HLADDR STATUS DIRTY BA ADDR NXT_HASH PRV_HASH TCH
---------------- ------ ----- ---------------- ---------------- ---------------- ---------------- ---
000000018B3BC7A8 xcur N 00000000CA14E000 FFFF80FFBC133110 00000000CAF85A10 00000000CAF9EF10 45
000000018B3BC7A8 cr N 00000000CAB76000 FFFF80FFBC133F38 00000000CAF9EF10 000000018A7115B8 1
000000018B3BC7A8 cr N 00000000CA64C000 FFFF80FFBC133DE0 00000000CAF6D050 00000000CAFD2950 1
000000018B3BC7A8 cr N 00000000CA3C4000 FFFF80FFBC132FB8 00000000CAFB8E10 00000000CAF6D050 1
000000018B3BC7A8 cr N 00000000CA8E4000 FFFF80FFBC132E60 000000018A7115B8 00000000CAF85A10 1
If we dump the contention buffer block (3665613), the buffer head section looks like:
-- alter system dump datafile 1548 block 3665613;
BH (0xcafd2618) file#: 1548 rdba: 0x0037eecd (1024/3665613) class: 1 ba: 0xcab72000
hash: [0xcaf9ec90,0x18ae0dac8] lru: [0xcafd2200,0x1897cc750]
st: CR md: NULL tch: 1
flags: only_sequential_access
BH (0xcaf9ebd8) file#: 1548 rdba: 0x0037eecd (1024/3665613) class: 1 ba: 0xca648000
hash: [0xcaf6c8d0,0xcafd26d0] lru: [0xcaf9ea40,0x1897cb860]
st: CR md: NULL tch: 1
flags: only_sequential_access
BH (0xcaf6c818) file#: 1548 rdba: 0x0037eecd (1024/3665613) class: 1 ba: 0xca142000
hash: [0xcafb8f50,0xcaf9ec90] lru: [0xcaf6ccc0,0x1897cd640]
st: CR md: NULL tch: 1
flags: only_sequential_access
BH (0xcafb8e98) file#: 1548 rdba: 0x0037eecd (1024/3665613) class: 1 ba: 0xca8e6000
hash: [0xcaf84ed0,0xcaf6c8d0] lru: [0xcafb8800,0x1897cbfd8]
st: CR md: NULL tch: 1
flags: only_sequential_access
BH (0xcaf84e18) file#: 1548 rdba: 0x0037eecd (1024/3665613) class: 1 ba: 0xca3b2000
hash: [0xcaf853d0,0xcafb8f50] lru: [0xcaf85400,0x1897cb0e8]
st: CR md: NULL tch: 1
flags: only_sequential_access
BH (0xcaf85318) file#: 1548 rdba: 0x0037eecd (1024/3665613) class: 1 ba: 0xca3ba000
set: 13 pool: 3 bsz: 8192 bsi: 0 sflg: 2 pwc: 14,19
dbwrid: 0 obj: 2459815 objn: 2459815 tsn: [0/1999] afn: 1548 hint: f
hash: [0x18ae0dac8,0xcaf84ed0] lru: [0xcaf85180,0xcaf84f00]
obj-flags: object_ckpt_list
ckptq: [0x18be25400,0xfffab158] fileq: [0x18be31580,0x18be31580]
objq: [0x1750dac40,0x1750dac40] objaq: [0xcaf8af78,0x1750dac20]
use: [0x18bab8240,0x18bab8240] wait: [0x189465eb8,0x18bab9860]
st: XCURRENT md: EXCL fpin: 'ktspbwh2: ktspfmdb' fscn: 0x896.f43f474c tch: 0
flags: buffer_dirty mod_started block_written_once
change state: NOT_ACTIVE_YET
change count: 1
LRBA: [0x1a9a.d1c5d.0] LSCN: [0x896.f4ef66c5] HSCN: [0x896.f50b9757] HSUB: [1]
There are 6 buffer blocks, one is with "st: XCURRENT md: EXCL",
all other with "st: CR md: NULL".
If we pick all the hash fields, we can build a linked list:
(see Blog:
[Oracle] Database cache internals and why one data block can use multiple cache buffers)
NXT_HASH PRV_HASH Link_List
----------- ----------- ---------
1. BH (0xcafd2618) hash: [0xcaf9ec90,0x18ae0dac8] 1 -> 3
2. BH (0xcaf9ebd8) hash: [0xcaf6c8d0, 0xcafd26d0] head -> 4
3. BH (0xcaf6c818) hash: [0xcafb8f50, 0xcaf9ec90] 3 -> 5
4. BH (0xcafb8e98) hash: [0xcaf84ed0, 0xcaf6c8d0] 4 -> 6
5. BH (0xcaf84e18) hash: [0xcaf853d0, 0xcafb8f50] 5 -> tail
6. BH (0xcaf85318) hash: [0x18ae0dac8,0xcaf84ed0] 6 -> 1
If we look the index row section of above buffer dump, we can see the new inserted row (id: 4708)
are in the same block as split id from 6 to 10 (all ID are reversed),
and row (id: 4708) is locked ("lock: 2") by Itl (0x02).
seg/obj: 0x2588a7 csc: 0x896.f4ef5c84 itc: 2 flg: E typ: 2 - INDEX
Itl Xid Uba Flag Lck Scn/Fsc
0x01 0x0002.004.00dbbdfb 0x00c022d5.b648.0a C--- 0 scn 0x0896.f43f4854
0x02 0x0004.004.00769215 0x00c021d1.39b9.27 ---- 1 fsc 0x0000.00000000
Leaf block dump
===============
kdxcolev 0
kdxconro 263
...
row#20[4884] flag: -------, lock: 0, len=12
col 0; len 2; (2): 07 c1 REVERSE=> c1 07 => 6
col 1; len 6; (6): 00 28 2b 85 00 05
row#69[5519] flag: -------, lock: 0, len=12
col 0; len 2; (2): 08 c1 REVERSE=> c1 08 => 7
col 1; len 6; (6): 00 28 2b 85 00 06
row#117[4611] flag: -------, lock: 2, len=13
col 0; len 3; (3): 09 30 c2 REVERSE=> c2 30 09 => 4708
col 1; len 6; (6): 00 37 ee c2 00 16
row#118[6154] flag: -------, lock: 0, len=12
col 0; len 2; (2): 09 c1 REVERSE=> c1 09 => 8
col 1; len 6; (6): 00 28 2b 85 00 07
row#166[6776] flag: -------, lock: 0, len=12
col 0; len 2; (2): 0a c1 REVERSE=> c1 0a => 9
col 1; len 6; (6): 00 28 2b 85 00 08
row#214[7398] flag: -------, lock: 0, len=12
col 0; len 2; (2): 0b c1 REVERSE=> c1 0b => 10
col 1; len 6; (6): 00 28 2b 85 00 09
If we make 10200 and 10046 trace for 100 loops for id from 6 to 10,
we can see 500 "ktrgcm()" calls, and a few long 'latch: cache buffers chains' WAIT
(see Blog:
Oracle Unique Index Non-Consistency Read with RowCR).
'xcur' (state 1) buffer block is finally returned after 5 ktrexf() showing buffer of 'cr' (state 3).
alter session set events='10046 trace name context forever, level 12:
10200 trace name context forever, level 10'
tracefile_identifier='10046_10200_index_r_2';
exec test_tab_select(6, 100, 6, 0.00);
alter session set events='10046 trace name context off : 10200 trace name context off ';
ktrget2(): started for block <0x07cf : 0x0037eecd> objd: 0x002588a7
showing buffer 0x9ffeda58 rdba 0x37eecd state 3
ktrexf(): returning 9 on: 9ffedad0 cr-scn: 0x0897.0ed85359 xid: 0x0000.000.00000000
showing buffer 0x9ffd3a18 rdba 0x37eecd state 3
ktrexf(): returning 9 on: 9ffd3a90 cr-scn: 0x0897.0ed85358 xid: 0x0004.004.00769215
showing buffer 0x9ffb5298 rdba 0x37eecd state 3
ktrexf(): returning 1 on: 9ffb5310 cr-scn: 0x0897.0ed85358 xid: 0x0004.004.00769215
showing buffer 0x9ffed558 rdba 0x37eecd state 3
ktrexf(): returning 9 on: 9ffed5d0 cr-scn: 0x0897.0ed85357 xid: 0x0004.004.00769215
showing buffer 0x9ff9bb18 rdba 0x37eecd state 3
ktrexf(): returning 1 on: 9ff9bb90 cr-scn: 0x0897.0ed85356 xid: 0x0004.004.00769215
showing buffer 0x9ff94f98 rdba 0x37eecd state 1
ktrexf(): returning 1 on: ffff80ffbccf4e70 cr-scn: 0xffff.ffffffff xid: 0x0000.000.00000000
ktrgcm(): completed for block <0x07cf : 0x0037eecd> objd: 0x002588a7
ktrget2(): completed for block <0x07cf : 0x0037eecd> objd: 0x002588a7
......
WAIT #18446604434568798512: nam='latch: cache buffers chains' ela= 151231 address=6635150120 number=228 tries=0
-- Legend
-- DATA_OBJECT_ID: 2459815 = 0x002588a7
-- new index block 3665613 = 0x0037eecd
-- tablespace ts# 1999 = 0x07cf
-- state 1 = 'xcur'
-- state 3 = 'cr'
Above 10200 trace shows that we have to traverse 5 'cr' (state 3) buffer blocks in the CBC chain
before reaching 'xcur' (state 1) buffer block, and construct the required before-image buffer block.
Then require CBC latch in exclusive mode to link the new buffer block into the CBC chain.A query on v$sesstat reveals heavy CR buffer clones (by applying undo records) in the sessions of second group (for id from 6 to 10), but not the case of first group (for id from 1 to 5).
select s.client_identifier, st.sid, n.name, st.value,
value-lag(value) over(partition by s.sid order by n.name) delta
from v$sesstat st, v$statname n, v$session s
where st.statistic# = n.statistic#
and n.name in ('CR blocks created', 'data blocks consistent reads - undo records applied')
and st.sid = s.sid
and lower(s.program) like '%j0%'
order by s.client_identifier, s.sid, n.name;
CLIENT_IDENTIFIER SID NAME VALUE DELTA
-------------------- --- --------------------------------------------------- -------- ------
test_tab_select_id_1 900 CR blocks created 1
test_tab_select_id_1 900 data blocks consistent reads - undo records applied 0 -1
test_tab_select_id_1 7 CR blocks created 4
test_tab_select_id_1 7 data blocks consistent reads - undo records applied 3 -1
test_tab_select_id_1 186 CR blocks created 1
test_tab_select_id_1 186 data blocks consistent reads - undo records applied 2 1
test_tab_select_id_1 369 CR blocks created 1
test_tab_select_id_1 369 data blocks consistent reads - undo records applied 1 0
test_tab_select_id_1 547 CR blocks created 0
test_tab_select_id_1 547 data blocks consistent reads - undo records applied 0 0
test_tab_select_id_1 903 CR blocks created 0
test_tab_select_id_1 903 data blocks consistent reads - undo records applied 0 0
CLIENT_IDENTIFIER SID NAME VALUE DELTA
-------------------- --- --------------------------------------------------- -------- ------
test_tab_select_id_6 366 CR blocks created 62217832
test_tab_select_id_6 366 data blocks consistent reads - undo records applied 62134369 -83463
test_tab_select_id_6 9 CR blocks created 62673868
test_tab_select_id_6 9 data blocks consistent reads - undo records applied 62663191 -10677
test_tab_select_id_6 189 CR blocks created 62222626
test_tab_select_id_6 189 data blocks consistent reads - undo records applied 62255779 33153
test_tab_select_id_6 549 CR blocks created 62098359
test_tab_select_id_6 549 data blocks consistent reads - undo records applied 62139622 41263
test_tab_select_id_6 726 CR blocks created 62263906
test_tab_select_id_6 726 data blocks consistent reads - undo records applied 62293706 29800
test_tab_select_id_6 906 CR blocks created 62227364
test_tab_select_id_6 906 data blocks consistent reads - undo records applied 62217302 -10062
Finally, if we commit the single row insert, CBC latch contention disappears immediately.For more discussion on reverse key index, see:
Blog: reverse key index: the remedy is worse than the symptoms
and right linked book: Oracle Database Performance Tuning (Studies . Practices . Researches) - Chapter 3: Locks, Latches and Mutexes
6. Data Access Path (Xplan) and Latch Locations
In the above test, we use INDEX RANGE SCAN to access table row:
select /*+ job 4, id 6 */ val from test_tab where id = 7
--------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 1 (100)| |
| 1 | TABLE ACCESS BY INDEX ROWID BATCHED| TEST_TAB | 1 | 26 | 1 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | TEST_TAB#R | 1 | | 1 (0)| 00:00:01 |
--------------------------------------------------------------------------------------------------
If we only use INDEX RANGE SCAN without accessing table row:(In procedure test_tab_select, use second query statement)
select /*+ job 4, id 6 */ id from test_tab where id = 7
-------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 1 (100)| |
|* 1 | INDEX RANGE SCAN| TEST_TAB#R | 1 | 13 | 1 (0)| 00:00:01 |
-------------------------------------------------------------------------------
The top Location (most SLEEP_COUNT) is changed to "kcbchg: cr pin cleanout" (INDX=2118) instead of "kcbgtcr: fast path pin" (INDX=2195).
Here the Dtrace State Transition Diagram (CallStack has no more "qertbFetchByRowID"):
ksl_get_shared_latch:entry
---------Step 2621 at 2020 Feb 20 15:53:35 (Elapsed-ns=5633)---------
***State Entry>>>(nproc_pid=>0, flag=>0, gets=>1414234634, latch=>228, level=>1, where=>2118)
>>>Transition>>>ksl_get_shared_latch:entry
(Addr=>0x18B57B028, Immed_Get=>1, Why=>0, Where=>2298(0x8FA), Mode=>16, PID=>0x2000000000000019)
oracle`ksl_get_shared_latch
a.out`kcbzwb+0xa33
a.out`kcbgtcr+0x14a77
a.out`ktrget2+0x292
a.out`qeilbk1+0x3cf
a.out`qeilsr+0xbd
a.out`qerixFetch+0x1f9
a.out`qerstFetch+0x4ca
a.out`opifch2+0x188b
a.out`opiefn0+0x1f7
ksl_get_shared_latch:return
>>>State Return***(nproc_pid=>25, flag=>536870912, gets=>1414234638, latch=>228, level=>1, where=>2298)
7. Test Code
7.1. leaf_split_finder
create or replace procedure leaf_split_finder(p_find_cnt number) as
type t_blk_tab is table of varchar2(100) index by pls_integer;
l_blk_tab t_blk_tab;
l_object_id number;
l_stmt varchar2(32767);
begin
select object_id into l_object_id from dba_objects where object_name = 'TEST_TAB#R';
execute immediate 'truncate table test_tab';
l_stmt := q'[
select 'block: '||blk||', cnt: '||cnt||', min_id: '||min_id||', max_id: '||max_id
from (
select blk, count(*) cnt, min(id) min_id, max(id) max_id from (
select id, dbms_rowid.rowid_block_number(sys_op_lbid (]'||l_object_id||q'[, 'L', rowid)) blk
from test_tab)
where id between 1 and 11 -- Note using "id in (1, 6)", Toad crashed due to "sys_op_lbid"
group by blk order by min_id, blk)
]';
for i in 1..p_find_cnt loop
insert into test_tab values(i, -i);
execute immediate l_stmt bulk collect into l_blk_tab;
if l_blk_tab.count >= 2 then
dbms_output.put_line('--- Leaf Split at row count: '||i||
', (id between 1 and 11) in '||l_blk_tab.count||' Blocks');
for i in 1..l_blk_tab.count loop
dbms_output.put_line(l_blk_tab(i));
end loop;
exit;
end if;
end loop;
rollback;
end;
/
-- exec leaf_split_finder(5000);
--
-- --- Leaf Spli at row count: 4707, (id between 1 and 11) in 2 Blocks
-- block: 3665598, cnt: 5, min_id: 1, max_id: 5
-- block: 3665619, cnt: 6, min_id: 6, max_id: 11
7.2. test_tab_select_job
create or replace procedure test_tab_select (p_job_id number, p_cnt number, p_id number, p_sleep number) as
l_stmt varchar2(1000);
l_id number;
l_ret number;
begin
dbms_session.set_identifier('test_tab_select_id_'||p_id);
for i in 1..p_cnt loop
for r in 1..5 loop
l_id := p_id + r - 1;
l_stmt := 'select /*+ job '||p_job_id||', id '||p_id||' */ val from test_tab where id = '||l_id;
-- select id from test_tab, only using reverse index test_tab#r, no table access
--l_stmt := 'select /*+ job '||p_job_id||', id '||p_id||' */ id from test_tab where id = '||l_id;
-- dynamic statement makes each session has a different statement and avoid cusor pin S
execute immediate l_stmt into l_ret;
end loop;
if p_sleep > 0 then
dbms_lock.sleep(p_sleep);
end if;
end loop;
end;
/
-- exec test_tab_select(1, 100, 1, 0.00);
-- exec test_tab_select(6, 100, 6, 0.00);
create or replace procedure test_tab_select_job(p_job_cnt number, p_cnt number, p_id number, p_sleep number) as
l_job_id pls_integer;
begin
for i in 1.. p_job_cnt loop
dbms_job.submit(l_job_id,
'begin test_tab_select('||i||','||p_cnt||','||p_id||','||p_sleep||'); end;');
end loop;
commit;
end;
/
-- exec clean_jobs;
-- exec test_tab_select_job(6, 1e9, 1, 0);
-- exec test_tab_select_job(6, 1e9, 6, 0);
-- select * from dba_jobs;
create or replace procedure clean_jobs as
begin
for c in (select job from dba_jobs) loop
begin
dbms_job.remove (c.job);
exception when others then null;
end;
commit;
end loop;
for c in (select d.job, d.sid, (select serial# from v$session where sid = d.sid) ser
from dba_jobs_running d) loop
begin
execute immediate
'alter system kill session '''|| c.sid|| ',' || c.ser|| ''' immediate';
dbms_job.remove (c.job);
exception when others then null;
end;
commit;
end loop;
-- select * from dba_jobs;
-- select * from dba_jobs_running;
end;
/
-- exec clean_jobs; -- stop all jobs