Thursday, January 18, 2024

Blog List

One Test on the Different Errors of Oracle Global Temporary Tables vs. Private Temporary Tables

This Blog shows one different behaviour of Oracle Global Temporary Table (GTT since Oracle 8i) vs. Private Temporary Table (PTT since Oracle 18c).

Note: Tested in Oracle 19.19.


1. GTT Test


We will make two GTT tests, one without "order by", one with "order by". Both throw the same ORA-01002.


Test-1 Without Order By



drop table gtt_tab;

create global temporary table gtt_tab(x number) on commit delete rows;

-- Implicit cursor for loops array fetch 100 rows at a time by default in 10g
insert into gtt_tab(x) select level from dual connect by level <= 103;  
    
set serveroutput on;

begin
  execute immediate '
    begin
      for c in (select x from gtt_tab) loop
        commit;
        dbms_output.put_line(c.x);  -- error in 101-th row
      end loop;
    end;';
end;
/

--The output shows ORA-01002 error:

100
begin
*
ERROR at line 1:
ORA-01002: fetch out of sequence
ORA-06512: at line 3


Test-2 With Order By



drop table gtt_tab;

create global temporary table gtt_tab(x number) on commit delete rows;

-- Implicit cursor for loops array fetch 100 rows at a time by default in 10g
insert into gtt_tab(x) select level from dual connect by level <= 103;  
    
set serveroutput on;

begin
  execute immediate '
    begin
      for c in (select x from gtt_tab order by x) loop
        commit;
        dbms_output.put_line(c.x);   -- error in 101-th row
      end loop;
    end;';
end;
/

--The output shows the same ORA-01002 error:

100
begin
*
ERROR at line 1:
ORA-01002: fetch out of sequence
ORA-06512: at line 3


2. PTT Test


We will make two PTT tests, one without "order by", one with "order by".
Without "order by", it throws ORA-08103.
With "order by", PL/SQL is successfully completed.


Test-1 Without Order By



drop table ora$ptt_tab;

create private temporary table ora$ptt_tab (x number) on commit drop definition;

-- Implicit cursor for loops array fetch 100 rows at a time by default in 10g
insert into ora$ptt_tab(x) select level from dual connect by level <= 103; 

set serveroutput on;

begin
  execute immediate '
    begin
      for c in (select x from ora$ptt_tab) loop
        commit;
        dbms_output.put_line(c.x);  -- error in 101-th row
      end loop;
    end;';
end;
/

--The output shows ORA-08103 error:

100
begin
*
ERROR at line 1:
ORA-08103: object no longer exists
ORA-06512: at line 3


Test-2 With Order By



drop table ora$ptt_tab;

create private temporary table ora$ptt_tab (x number) on commit drop definition;

-- Implicit cursor for loops array fetch 100 rows at a time by default in 10g
insert into ora$ptt_tab(x) select level from dual connect by level <= 103; 

set serveroutput on;

begin
  execute immediate '
    begin
      for c in (select x from ora$ptt_tab order by x) loop
        commit;
        dbms_output.put_line(c.x);
      end loop;
    end;';
end;
/

--The output shows successfully completed:

103

PL/SQL procedure successfully completed.
The behaviour can be varied following different Oracle releases.
(see Oracle MOS:
     Post Database Upgrade to 19c, Fetch from Cursor Based on Global Temporary Table Raises ORA-01002 (Doc ID 2890026.1))

Wednesday, October 25, 2023

ORA-28268: Exceeded the maximum allowed size for Context information in a session

Note: Tested in Oracle 19.18.


1. Test Setup



-- code from Blog: "library cache: mutex X" and Application Context 
--    (http://ksun-oracle.blogspot.com/2016/11/library-cache-mutex-x-and-application.html)
create or replace context test_ctx using test_ctx_pkg;

--create or replace context test_ctx using test_ctx_pkg accessed globally;

create or replace package test_ctx_pkg is 
  procedure set_val (attr varchar2, val varchar2);
 end;
/

create or replace package body test_ctx_pkg is
  procedure set_val (attr varchar2, val varchar2) as
  begin
    --dbms_session.clear_all_context('TEST_CTX');
    dbms_session.set_context('TEST_CTX', attr, val);
  end;
end;
/

create or replace procedure ctx_set(p_cnt number, attr_pre varchar2, val_pre varchar2) as
begin
 for i in 1..p_cnt loop
  test_ctx_pkg.set_val(attr_pre||i, val_pre||i);    -- 'library cache: mutex X' on TEST_CTX
 end loop;
end;
/

create or replace procedure test_ORA_28268 (p_cnt number) as 
  l_attr_pre varchar2(32000);
  l_val_pre  varchar2(32000);
  l_val      varchar2(32000);
begin
  --dbms_session.clear_all_context('TEST_CTX');
  ctx_set(p_cnt, 'ATTR_', 'VAL_');
  
  select sys_context('TEST_CTX', 'ATTR_'||1) into l_val from dual;
  dbms_output.put_line('l_attr='||'ATTR_'||1||', VAL='||l_val);
  
  select sys_context('TEST_CTX', 'ATTR_'||p_cnt) into l_val from dual;
  dbms_output.put_line('l_attr='||'ATTR_'||p_cnt||', VAL='||l_val); 
end;
/

create or replace procedure list_context as 
  l_tab  dbms_session.appctxtabtyp;
  l_size number;
begin
  dbms_session.list_context(l_tab, l_size);
  dbms_output.put_line('l_tab.count: '||l_tab.count||', l_size:'||l_size);
  for i in 1..l_tab.count loop
    dbms_output.put_line('namespace: '||l_tab(i).namespace||', attribute: '||l_tab(i).attribute||', value: '||l_tab(i).value);
  end loop;
end;
/

2. Test Run



SQL > exec test_ORA_28268(10000-300);

      l_attr=ATTR_1, VAL=VAL_1
      l_attr=ATTR_9700, VAL=VAL_9700

SQL > exec test_ORA_28268(10000+300);

      BEGIN test_ORA_28268(10000+300); END;
      ERROR at line 1:
      ORA-28268: Exceeded the maximum allowed size for Context information in a session
      ORA-06512: at "SYS.DBMS_SESSION", line 141
      ORA-06512: at "K.TEST_CTX_PKG", line 5
      ORA-06512: at "K.CTX_SET", line 4
      ORA-06512: at "K.TEST_ORA_28268", line 7
      ORA-06512: at line 1

3. ORA-28268


Oracle document:

  28268, 0000, "Exceeded the maximum allowed size for Context information in a session"
  // *Cause:    The maximum size specified by the _session_context_size
  //            init.ora parameter was exceeded.
  // *Action: Please change the value for _session_context_size in the
  //  
ORA-28268 is raised when the number of session local context entries exceeded limit (session local context array) which is defined by "_session_context_size" (default 10,000).

It is only applied to local context (dba_context.type = 'ACCESSED LOCALLY')
(for global context, ORA-04031 when too many global context entries, check v$sgastat.name = 'Global Context').

It's value can be modified by:

  alter system set "_session_context_size"=10000 scope=spfile;
One cause of ORA-28268 is probably session local context leak.

The name "_session_context_size" seems misleading, mabybe renamed as "_session_context_entry_limit".

Wednesday, September 20, 2023

Oracle Index Root Modification CBC Latch Contention: Workaround Studies

In Oracle applications, often there are certain central tables, which are accessed concurrently by many sessions using indexes. If one index root block is modified due to branch or leaf block split when running DML statements, the system could experience hard to predict heavy CBC Latch Contention on root block.

In this Blog, we would try to set apart root block modification from DML operations so that root block modification is performed at first, and then subsequent DML are executed.

The purpose is only for illustrative, experimental and feasibility studies in the hope that Oracle can refine the idea.

Test code is based on Blogs:

   Index Service ITL and Recursive Transaction
   Cache Buffer Chains Latch Contention Case Study-2: Reverse Primary Key Index
   Oracle 19.18 Wait Event 'latch: cache buffers chains' P3: block number

Note: Tested in Oracle 19.18.


1. Test Setup



--=================== Test Setup in session-1 ===================--

drop table test_tab purge;

create table test_tab (id number, val number);

drop index test_tab#r;

create unique index test_tab#r on test_tab(id) reverse;

alter table test_tab add constraint test_tab#r primary key (id);

truncate table test_tab;

insert into test_tab select level, -level from dual connect by level <= 4905;

commit;

-- Test Code to insert one row to trigger leaf block split, and root block modification
create or replace procedure root_modi_cbc_latch_workaround (p_insert_id number, p_allow_seconds number := 5) as
  type t_name_tab    is table of dba_segments.segment_name%type;
  type t_file_tab    is table of dba_segments.header_file%type;
  type t_block_tab   is table of dba_segments.header_block%type;
  l_name_tab         t_name_tab   := t_name_tab();
  l_file_tab         t_file_tab   := t_file_tab();
  l_block_tab        t_block_tab  := t_block_tab();
  l_cnt              number := 0; 
  l_scn              number;
  l_scn_ts           timestamp;
  l_modi_ts          timestamp;
  l_msg_str          varchar2(200);
  l_modi             boolean := false;
begin
  -- collect indexes and their root files/root blocks
  -- @TODO store those in a package variable to workaround dba_segments performance
  with sq as (select /*+ materialize */ index_name from dba_indexes where table_name = 'TEST_TAB')
  select /*+ leading(q) use_nl(s) */ q.index_name, header_file, header_block + 1
         bulk collect into l_name_tab, l_file_tab, l_block_tab
    from sq q, dba_segments s where s.segment_name = q.index_name;
  
  dbms_session.sleep(p_allow_seconds);  -- simulate appliaction
  
  -- check if there are any root block modifications. If yes, rollback, re-insert. If not, exit loop.
  for i in 1 ..2 loop
    dbms_output.put_line(systimestamp||'--- Run ' ||i||'--- set savepoint');
    savepoint sp;
    insert into test_tab values(p_insert_id, -p_insert_id);
    l_modi_ts := cast (systimestamp as timestamp);
    
    if i = 1 then 
      for k in 1..l_name_tab.count loop
        select count(1) into l_cnt from v$bh 
         where file#=l_file_tab(k) and block# = l_block_tab(k) and status = 'xcur';   --and dirty = 'Y'

        select max(hscn_wrp*power(2, 32) + hscn_bas) into l_scn from x$bh 
         where l_cnt > 0 and state = 1 and hscn_wrp > 0 and file#=l_file_tab(k) and dbablk=l_block_tab(k);   --state=1, 'xcur'
        
        if l_scn > 0 then
          l_scn_ts := scn_to_timestamp(l_scn);    -- scn_to_timestamp session cached. run test in one new session.
        end if;
        
        l_msg_str := systimestamp||'--- Run ' ||i||'--- Index '||l_name_tab(k)||' Root (File = '||l_file_tab(k)||', Block = '||l_block_tab(k)||'), '||
                    'l_cnt = '||l_cnt||', l_scn = '||l_scn||', l_scn_ts = '||l_scn_ts;
        
        dbms_output.put_line(l_msg_str);
        
        -- scn_to_timestamp: the usual precision of the result value is 3 seconds.
        if l_scn_ts between l_modi_ts - numtodsinterval(p_allow_seconds, 'SECOND') and l_modi_ts then 
          dbms_output.put_line('*** Root modified at '||l_modi_ts||', Root Index Modi at '||l_scn_ts ||
                               ', rollback to make only root modi trx performed, then re-insert.');
          l_modi := true;
          rollback to savepoint sp;   -- only rollback main trx, root modi recursive trx committed (un-rollbackable)
          exit;                       -- one index root modi found, finish modi recursive trx, exit internal loop
        end if; 
      end loop;
      
      if not l_modi then 
        dbms_output.put_line('*** Root not modified');
        exit;
      end if;
    end if;
  end loop;    
end;
/


2. Test Run



--========== Test Run in session-2 (to repeat test, run "Test Setup" at first) ==========--

set serveroutput on
alter session set nls_timestamp_format ='YYYY*MON*DD HH24:MI:SS.FF9';

select scn_to_timestamp(ora_rowscn), t.* from test_tab t where id >= 4904 order by id;

exec root_modi_cbc_latch_workaround(4906);

exec root_modi_cbc_latch_workaround(4907);

select scn_to_timestamp(ora_rowscn), t.* from test_tab t where id >= 4904 order by id;

rollback;


3. Test Output



--=================== Test Output in session-2 ===================--

12:01:34 SQL > select scn_to_timestamp(ora_rowscn), t.* from test_tab t where id >= 4904 order by id;
  SCN_TO_TIMESTAMP(ORA_ROWSCN)             ID        VAL
  -------------------------------- ---------- ----------
  2023*SEP*20 12:01:17.000000000         4904      -4904
  2023*SEP*20 12:01:17.000000000         4905      -4905
  
  2 rows selected.

12:01:34 SQL > exec root_modi_cbc_latch_workaround(4906);
  20-SEP-2023 12:01:42 +02:00--- Run 1--- set savepoint
  20-SEP-2023 12:01:42 +02:00--- Run 1--- Index TEST_TAB#R Root (File = 22, Block = 35467), l_cnt = 1, 
                                          l_scn = 12776808521751, l_scn_ts = 2023*SEP*20 12:01:41.000000000
  *** Root modified at 2023*SEP*20 12:01:42.271069000, Root Index Modi at 2023*SEP*20 12:01:41.000000000, 
      rollback to make only root modi trx performed, then re-insert.
  20-SEP-2023 12:01:42 +02:00--- Run 2--- set savepoint

  Elapsed: 00:00:07.80
  
12:01:42 SQL > exec root_modi_cbc_latch_workaround(4907);
  20-SEP-2023 12:01:50 +02:00--- Run 1--- set savepoint
  20-SEP-2023 12:01:50 +02:00--- Run 1--- Index TEST_TAB#R Root (File = 22, Block = 35467), l_cnt = 1, 
                                          l_scn = 12776808521752, l_scn_ts = 2023*SEP*20 12:01:41.000000000
  *** Root not modified

  Elapsed: 00:00:07.86
  
12:01:50 SQL > select scn_to_timestamp(ora_rowscn), t.* from test_tab t where id >= 4904 order by id;
  SCN_TO_TIMESTAMP(ORA_ROWSCN)             ID        VAL
  -------------------------------- ---------- ----------
  2023*SEP*20 12:01:17.000000000         4904      -4904
  2023*SEP*20 12:01:17.000000000         4905      -4905
  2023*SEP*20 12:01:17.000000000         4906      -4906
  2023*SEP*20 12:01:17.000000000         4907      -4907
  
  4 rows selected.
  
12:01:50 SQL > rollback;
  Rollback complete.
Test output shows that both new rows with ID 4906 and 4907 are inserted, only ID 4096 triggers root block modification, but ID 4097 does not.

Wednesday, September 6, 2023

Oracle 19.18 Wait Event 'latch: cache buffers chains' P3: block number

In Oracle 19.18, we observed that Wait Event 'latch: cache buffers chains' Parameter P3 stores DBA (Data Block Address)
(for bigfile tablespace, it is block number (block#). For smallfile tablespace, it is rfile# * power(2, 22) + block#).
(See Blog: latch: cache buffers chains latch contention - a better way for finding the hot block )

This new enhancement makes it much easier to locate CBC latch contention point.

In Oracle Docu, PARAMETER3 is still denoted as "why":

select event#, event_id, name, parameter1, parameter2, parameter3 from v$event_name where name = 'latch: cache buffers chains';

  EVENT#   EVENT_ID NAME                          PARAMETER1 PARAMETER2 PARAMETER3
  ------ ---------- ----------------------------- ---------- ---------- ----------
     113 2779959231 latch: cache buffers chains   address    number     why
To watch such new improvement, one can run test code in Blog: Cache Buffer Chains Latch Contention Case Study-2: Reverse Primary Key Index

We can use following queries to find p3 blocks:

select program, event, p1, p2, p3, p1text, p2text, p3text, v.* 
  from v$session v where event = 'latch: cache buffers chains' order by v.p3; 

select program, event, p1, p2, p3, p1text, p2text, p3text, v.* 
  from v$active_session_history v  
  -- from dba_hist_active_sess_history t
 where event = 'latch: cache buffers chains' order by sample_time desc, v.p3; 
Then with above p3 blocks, list objects by:

select o.object_name, b.* 
  from v$bh b, dba_objects o 
 where b.objd=o.data_object_id 
   and block# in (11763, 11765, 35468)   -- 3 example blocks
 order by block#, class#, b.status;

select o.object_name, dbablk, tch, hladdr cbc_latch_addr
      ,hscn_wrp*power(2, 32) + hscn_bas scn
      ,case when hscn_wrp > 0 then scn_to_timestamp(hscn_wrp*power(2, 32) + hscn_bas) else null end scn_ts
      ,tim epoch
      ,to_date('1970-01-01 00:00:00', 'yyyy-mm-dd hh24:mi:ss') + tim/86400 epoch_time_utc
      ,DECODE (state, 0, 'free', 1, 'xcur', 2, 'scur', 3, 'cr', 4, 'read', 5, 'mrec', 6, 'irec', 7, 'write', 8, 'pi', 9, 'memory', 10, 'mwrite'
              ,11, 'donated', 12, 'protected', 13, 'securefile', 14, 'siop', 15, 'recckpt', 16, 'flashfree', 17, 'flashcur', 18, 'flashna') 
       status
      ,DECODE (BITAND (flag, 1), 0, 'N', 'Y')     dirty
      ,DECODE (BITAND (flag, 16), 0, 'N', 'Y')    temp
      ,DECODE (BITAND (flag, 1536), 0, 'N', 'Y')  ping
      ,DECODE (BITAND (flag, 16384), 0, 'N', 'Y') stale
      ,DECODE (BITAND (flag, 65536), 0, 'N', 'Y') direct
      ,b.* 
  from x$bh b, dba_objects o 
 where b.obj=o.data_object_id 
   and b.dbablk in (11763, 11765, 35468)   -- 3 example blocks
 order by b.dbablk, b.state;
 
-- With above scn_ts to find v$session.sid by matching v$transaction.start_time

Sunday, September 3, 2023

Oracle Index Range Scan with LIKE Condition on Wildcard '_'

This Blog shows LIKE condition index range scan performance and space usage of Oracle index with Wildcard '_'.

Note: Tested in Oracle 19.18.


1. Test Setup


At first, we create a test table with 3 varchar2 columns (name1, name2, name3), which are the concatenations of 3 same items, but in different order, and then 3 respective indexes on them. The item part contains Wildcard '_'.

drop table test_tab purge;

create table test_tab (
    id           number
   ,grp          number
   ,tstr         varchar2(14)
   ,name1        varchar2(100)
   ,name2        varchar2(100)
   ,name3        varchar2(100)
);

create unique index test_tab#p on test_tab(id);

alter table test_tab add constraint test_tab#p primary key (id);

create index test_tab#i#name1 on test_tab (name1);

create index test_tab#i#name2 on test_tab (name2);

create index test_tab#i#name3 on test_tab (name3);
 
insert into test_tab 
with sq as (
select level id, mod(level, 300) grp
      ,to_char((date'2021-11-22' + rownum/86400), 'YYYYMMDDHH24MISS')           ts
      ,decode(mod(level, 3), 0, 'ONE_PART', 1, 'TWO_PART', 2, 'THREE_PART')     part
  from dual connect by level <= 3*1e5)
select id, grp, ts
      ,part ||'.'||grp  ||'.'||ts     name1
      ,grp  ||'.'||part ||'.'||ts     name2
      ,ts   ||'.'||part ||'.'||grp    name3
 from sq;
 
commit;

exec dbms_stats.gather_table_stats('', 'TEST_TAB', cascade=>true);
If we run a query, we can see the format of three name columns:

col name1 for a30 new_value n1
col name2 for a30 new_value n2
col name3 for a30 new_value n3
select id, name1, name2, name3 from test_tab m where id = trunc(dbms_random.value(1, 3*1e5));

    ID  NAME1                        NAME2                        NAME3
------  ---------------------------  ---------------------------  ---------------------------
243505  TWO_PART.205.20211124193825  205.TWO_PART.20211124193825  20211124193825.TWO_PART.205
Then we create a Plsql test program using 3 indexes and printout their execution stats.

create or replace procedure test_tab_proc (p_case number, p_cnt number) as
  l_start       number;
  l_start_cr    number;
  l_end_cr      number;
  l_name1       varchar2(100);
  l_name2       varchar2(100);
  l_name3       varchar2(100);
  l_ret         varchar2(200);
begin
  select name1, name2, name3 into l_name1, l_name2, l_name3
    from test_tab where id = trunc(dbms_random.value(1, 3*1e5)); 

  l_start  := dbms_utility.get_time;
  select value into l_start_cr from v$mystat s, v$statname n where s.statistic#=n.statistic# and name = 'consistent gets';
  case p_case
    when 1 then
      for i in 1..p_cnt loop
        select  /*+ index_rs_asc(t (name1)) */ name1 into l_ret from test_tab t where name1 like l_name1;
      end loop;
      dbms_output.put_line('--------- Index: name1 like '||l_name1||' --------- ');
    when 2 then
      for i in 1..p_cnt loop
        select  /*+ index_rs_asc(t (name2)) */ name2 into l_ret from test_tab t where name2 like l_name2;
      end loop;
      dbms_output.put_line('--------- Index: name2 like '||l_name2||' --------- ');
    when 3 then
      for i in 1..p_cnt loop
        select  /*+ index_rs_asc(t (name3)) */ name3 into l_ret from test_tab t where name3 like l_name3;
      end loop;
      dbms_output.put_line('--------- Index: name3 like '||l_name3||' --------- ');
    end case;
    select value into l_end_cr from v$mystat s, v$statname n where s.statistic#=n.statistic# and name = 'consistent gets';
    dbms_output.put_line('Total Elapsed MS = '||round((dbms_utility.get_time-l_start)*10)||
                       ', Total CR gets= '    ||(l_end_cr-l_start_cr)||
                       ', Per Exec MS = '     ||round((dbms_utility.get_time-l_start)*10/p_cnt, 2)||
                       ', Per Exec CR gets = '||round((l_end_cr-l_start_cr)/p_cnt));
end;
/


2. Test Run


We run 3 tests for 3 indexes with Sql trace:

alter session set tracefile_identifier = 'sql_trc_1';
alter session set events '10046 trace name context forever, level 12'; 

exec test_tab_proc(1, 100);
exec test_tab_proc(2, 100);
exec test_tab_proc(3, 100);

alter session set events '10046 trace name context off';
The output shows that index name1 has 902 CR gets per execution, name2 has 12, and name3 has 3, although each of them returns one single row per execution.

SQL > exec test_tab_proc(1, 100);
      --------- Index: name1 like ONE_PART.270.20211122144930 ---------
      Total Elapsed MS = 1380, Total CR gets= 90200, Per Exec MS = 13.8, Per Exec CR gets = 902

SQL > exec test_tab_proc(2, 100);
      --------- Index: name2 like 37.TWO_PART.20211124044537 ---------
      Total Elapsed MS = 30, Total CR gets= 1200, Per Exec MS = .3, Per Exec CR gets = 12

SQL > exec test_tab_proc(3, 100);
      --------- Index: name3 like 20211124141442.ONE_PART.282 ---------
      Total Elapsed MS = 10, Total CR gets= 300, Per Exec MS = .1, Per Exec CR gets = 3
Here 3 xplans. They are all using INDEX RANGE SCAN, but different execution stats.

************************ INDEX RANGE SCAN TEST_TAB#I#NAME1 **************************

SELECT /*+ index_rs_asc(t (name1)) */ NAME1 FROM TEST_TAB T WHERE NAME1 LIKE :B1 

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute    100      0.01       0.01          0          0          0           0
Fetch      100      1.34       1.34          0      90200          0         100
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total      201      1.35       1.35          0      90200          0         100

Rows  Row Source Operation
----  ---------------------------------------------------
   1  INDEX RANGE SCAN TEST_TAB#I#NAME1 (cr=902 pr=0 pw=0 time=17202 us starts=1 cost=2 size=29 card=1)(object id 5082850)


************************ INDEX RANGE SCAN TEST_TAB#I#NAME2 **************************

SELECT /*+ index_rs_asc(t (name2)) */ NAME2 FROM TEST_TAB T WHERE NAME2 LIKE :B1 

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute    100      0.00       0.00          0          0          0           0
Fetch      100      0.01       0.01          0       1200          0         100
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total      201      0.02       0.02          0       1200          0         100

Rows  Row Source Operation
----  ---------------------------------------------------
   1  INDEX RANGE SCAN TEST_TAB#I#NAME2 (cr=12 pr=0 pw=0 time=279 us starts=1 cost=2 size=29 card=1)(object id 5082851)


************************ INDEX RANGE SCAN TEST_TAB#I#NAME3 **************************

SELECT /*+ index_rs_asc(t (name3)) */ NAME3 FROM TEST_TAB T WHERE NAME3 LIKE :B1 

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute    100      0.00       0.00          0          0          0           0
Fetch      100      0.00       0.00          0        300          0         100
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total      201      0.00       0.00          0        300          0         100

Rows  Row Source Operation
----  ---------------------------------------------------
   1  INDEX RANGE SCAN TEST_TAB#I#NAME3 (cr=3 pr=0 pw=0 time=55 us starts=1 cost=2 size=29 card=1)(object id 5082852)


3. LIKE Condition Index Range Scan on Wildcard '_'Index


Following query shows that TEST_TAB#I#NAME1 and TEST_TAB#I#NAM2 have similar clustering_factor and leaf_blocks (space usage), but TEST_TAB#I#NAME3 has much less (see later Index Treedump).

select index_name, clustering_factor, leaf_blocks, blevel from dba_indexes v where table_name = 'TEST_TAB';

  INDEX_NAME         CLUSTERING_FACTOR LEAF_BLOCKS   BLEVEL
  ------------------ ----------------- ----------- --------
  TEST_TAB#I#NAME1              300000        2766        2
  TEST_TAB#I#NAME2              300000        2773        2
  TEST_TAB#I#NAME3                4714        1478        2
We can also get the distinct number of 3 name columns before wildcard '_':

select count(*)  
      ,count(distinct substr(name1, 1, instr(name1, '_')-1)) name1_prefix_cnt
      ,count(distinct substr(name2, 1, instr(name2, '_')-1)) name2_prefix_cnt
      ,count(distinct substr(name3, 1, instr(name3, '_')-1)) name3_prefix_cnt
from test_tab;

  COUNT(*)  NAME1_PREFIX_CNT  NAME2_PREFIX_CNT  NAME3_PREFIX_CNT
  --------  ----------------  ----------------  ----------------
    300000                 3               300            300000
name1 in test_tab is constructed like:

   ONE_PART.x.y
   TWO_PART.x.y
   THREE_PART.x.y
Before '_', there are 3 different values ('ONE', 'TWO', 'THREE'). So name1 is divided into three parts. TEST_TAB#I#NAME1 has 2766 leaf blocks, TEST_TAB#I#NAME1 index range scan makes 2766/3, which is 902 CR gets per execution (it performs like an index partition full scan).

Similarily for name2, it is 2773/300, which is about 12 CR gets per execution (index BLEVEL = 2 requires 2 branch blocks gets per execution).

And for name3, it is 3 CR gets per execution (2 branch blocks gets and 1 leaf block get).

Above tests show that all of 3 queries return one single row per execution, but TEST_TAB#I#NAME1 has a much higher cost.

If we run a query to compare the result of ordered index range scan with unordered index fast full scan with query:

with sq_rs  as (select /*+ index_asc(t test_tab#i#name1) materialize */ name1, rownum rn from test_tab t where name1 like 'ONE_PART%') 
    ,sq_ffs as (select /*+ index_ffs(t test_tab#i#name1) materialize */ name1, row_number() over(order by name1) rn from test_tab t where name1 like 'ONE_PART%')
select r.*, f.*
from sq_rs r, sq_ffs f
where r.rn = f.rn and r.name1 != f.name1  
--where r.rn != f.rn and r.name1 = f.name1 
;
   
no rows selected
"no rows selected" means that name1 in TEST_TAB#I#NAME1 is strictly sorted, but Like condition index range scan only uses substring before '_' for the search, therefore 1/3 TEST_TAB#I#NAME1 blocks are read although only one leaf block contains the searched data.


4. Index Meta Data


We can also collect more index meta data, then make index treedump and index block dump:

select object_name, object_id, to_char(object_id, 'xxxxxxxx') object_id_hex from dba_objects t where object_name like 'TEST_TAB#%';   

OBJECT_NAME         OBJECT_ID OBJECT_ID
------------------ ---------- ---------
TEST_TAB#I#NAME1      5082850    4d8ee2
TEST_TAB#I#NAME2      5082851    4d8ee3
TEST_TAB#I#NAME3      5082852    4d8ee4

select column_name, avg_col_len from  dba_tab_columns where table_name = 'TEST_TAB';   -- length 29

COLUMN_NAME   AVG_COL_LEN
------------- -----------
ID                      5
GRP                     4
TSTR                   15
NAME1                  29
NAME2                  29
NAME3                  29

select segment_name, header_file, header_block from dba_segments t where segment_name like 'TEST_TAB#%';

SEGMENT_NAME       HEADER_FILE HEADER_BLOCK
------------------ ----------- ------------
TEST_TAB#I#NAME1            22       826450
TEST_TAB#I#NAME2            22       826442
TEST_TAB#I#NAME3            22       826434

Index Treedump


alter session set tracefile_identifier = "TEST_TAB#I#NAME1_td";
alter session set events 'immediate trace name treedump level 5082850'; 

alter session set tracefile_identifier = "TEST_TAB#I#NAME2_td";
alter session set events 'immediate trace name treedump level 5082851';

alter session set tracefile_identifier = "TEST_TAB#I#NAME3_td";
alter session set events 'immediate trace name treedump level 5082852'; 
Treedump shows that NAME1 and NAME2 index blocks have avs = 4000 (AVailable Space, free space within the leaf block), whereas NAME3 has only avs=5. That is why name1 and name2 have more LEAF_BLOCKS than name3.

TEST_TAB#I#NAME1 
      leaf: 0xf5fcd 1007565 (1: row:108.108 avs:4000)
TEST_TAB#I#NAME2
      leaf: 0xf5dde 1007070 (1: row:108.108 avs:4000) 
TEST_TAB#I#NAME3
      leaf: 0xc9c46 826438 (1: row:202.202 avs:5)
The above dba_segments for TEST_TAB#I#NAME1 shows that its HEADER_BLOCK is 826450. If we look following TEST_TAB#I#NAME1 tree dump, we can see that root node block: 826451 is the next block of its HEADER_BLOCK: 826450.

------------- TEST_TAB#I#NAME1 tree dump -------------
----- begin tree dump
branch: 0xc9c53 826451 (0: nrow: 16, level: 2)
   branch: 0xf335c 996188 (-1: nrow: 162, level: 1)
      leaf: 0xc9c54 826452 (-1: row:107.107 avs:4037)
      leaf: 0xf3718 997144 (0: row:108.108 avs:4000)
      leaf: 0xf5fcd 1007565 (1: row:108.108 avs:4000)
      leaf: 0xf8517 1017111 (2: row:108.108 avs:4000)
      leaf: 0x15a2fe 1417982 (3: row:108.108 avs:4000) 
      
Note: "0xc9c53 826451" are DBA (Data Block Address) in (hex decimal).
      For bigfile tablespace, it is block#; for smallfile tablespace, it is rfile# * power(2, 22) + block#.
	  see dbms_utility.data_block_address_file, dbms_utility.data_block_address_block.
      
      If decimal number is negative, convert it by: power(2, 32) + "decimal number" = DBA.

Index analyze


analyze index TEST_TAB#I#NAME1 validate structure offline;
--validate index TEST_TAB#I#NAME1
select * from index_stats;
select * from index_histogram; 

analyze index TEST_TAB#I#NAME3 validate structure offline;
--validate index TEST_TAB#I#NAME3
select * from index_stats;
select * from index_histogram; 

Index Data Block Dump


alter session set tracefile_identifier = "TEST_TAB#I#NAME1_seg";
alter system dump datafile 22 block min 826450 block max 826457;	

alter session set tracefile_identifier = "TEST_TAB#I#NAME2_seg";
alter system dump datafile 22 block min 826442 block max 826449;	

alter session set tracefile_identifier = "TEST_TAB#I#NAME3_seg";
alter system dump datafile 22 block min 826434 block max 826441;	

NAME1 Index blocks List


select blk, count(*), min(id) min_id, max(id) max_id, min(name1), max(name1) from (
  select dbms_rowid.rowid_block_number(sys_op_lbid (5082845, 'L', rowid)) blk, t.* from test_tab t)  
group by blk order by blk;

Index Entry Sequence

Dump and compare block# read sequence of index range scan and index fast full scan against index treedump (index data structure)

-- index range scan gets index TEST_TAB#I#NAME1 blocks in ordered read (db file sequential read').
--   index range scan first reads index blocks from root block to the leftmost satisfied first leaf block along branch blocks, 
--   then reads from first found leaf blocks till last satisfied leaf block (which are linked with each one points to next one).
--   One block per read, logically sequential.

alter system flush buffer_cache; 
alter session set tracefile_identifier = 'sql_trc_rs';
alter session set events '10046 trace name context forever, level 12'; 
with sq_rs  as (select /*+ index_asc(t test_tab#i#name1) materialize */ name1, rownum rn from test_tab t where name1 like 'ONE_PART%') 
select count(*) from sq_rs r;
alter session set events '10046 trace name context off';

-- index fast full scan gets index TEST_TAB#I#NAME1 blocks in unordered read ('db file scattered read').
--   index fast full scan reads all index blocks (brach/leaf) like full table scan.
--   Multiple blocks per read without considering any order.

alter system flush buffer_cache; 
alter session set tracefile_identifier = 'sql_trc_ffs';
alter session set events '10046 trace name context forever, level 12'; 
with sq_ffs as (select /*+ index_ffs(t test_tab#i#name1) materialize */ name1, row_number() over(order by name1) rn from test_tab t where name1 like 'ONE_PART%')
select count(*) from sq_ffs f;
alter session set events '10046 trace name context off';

Thursday, August 3, 2023

Oracle SYS_GUID Composition

Oracle Docu wrote:
SYS_GUID generates and returns a globally unique identifier (RAW value) made up of 16 bytes. 
On most platforms, the generated identifier consists of a host identifier, a process or thread identifier of the process 
or thread invoking the function, and a nonrepeating value (sequence of bytes) for that process or thread.
The Oracle Database SYS_GUID function does not return a standard UUID since the generated GUID is not a random number.
(see: IETF RFC 4122 version 4 UUID, and Oracle dbms_crypto.randombytes and Enhancement Suggestion).

In this Blog, we will try to look its construction.

With following query, we can decompose it into 5 components.

with sq as (select sys_guid() gid, s.logon_time, p.spid, s.sid, s.serial#, p.pid from v$session s, v$process p 
             where s.paddr=p.addr and (s.sid = (select distinct sid from v$mystat)))
select substr(gid, 1, 12) seq_inc_by_1
      ,substr(gid, 13, 4)||'(='||to_number(substr(gid, 13, 4), 'XXXXXX')||')' proc_spid  --, to_number(substr(gid, 13, 4), 'XXXXXX') proc_spid_number
      ,substr(gid, 17, 4) unknown_1
      ,substr(gid, 21, 8) unix_host_id
      ,substr(gid, 29, 4) unknown_2
      ,sq.*
from sq;

  SEQ_INC_BY_1   PROC_SPID   UNKNOWN_1 UNIX_HOST_ID  UNKNOWN_2 
  -------------- ----------- --------- ------------  ----------
  01EC602EB9D3   259D(=9629) E063      6E08740A      29BD      
   
  GID                              LOGON_TIME           SPID SID SERIAL# PID
  -------------------------------- -------------------- ---- --- ------- ---
  01EC602EB9D3259DE0636E08740A29BD 2023-AUG-02 09:11:33 9629 197    5605  31


== Legend ==

    SEQ_INC_BY_1 : session own Sequence Number, increase 1 per sys_guid call, 
                   initiated by a number related to v$session logon_time
    PROC_SPID    : v$process.spid
    UNKNOWN_1    : ??? (E063 or E064)
    UNIX_HOST_ID : hostid command output (Linux little endian, 4 bytes reverse order)
    UNKNOWN_2    : ???
With each call of sys_guid, SEQ_INC_BY_1 is increased by 1, starting from some v$session.logon_time.

with sq1 as (select /*+ materialize */ level         nr, substr(sys_guid(), 1, 12) guid_12 from dual connect by level <= 1e6)
    ,sq2 as (select /*+ materialize */ level + 1*1e6 nr, substr(sys_guid(), 1, 12) guid_12 from dual connect by level <= 1e6)
    ,sq3 as (select /*+ materialize */ level + 2*1e6 nr, substr(sys_guid(), 1, 12) guid_12 from dual connect by level <= 1e6)
select min(nr) min_nr, max(nr) max_nr
      ,min(guid_12) min_guid_12, max(guid_12) max_guid_12
      ,count(*) nr_count
      ,to_number(max(guid_12), 'xxxxxxxxxxxx') - to_number(min(guid_12), 'xxxxxxxxxxxx') + 1 nr_count
from 
(select * from sq1
   union 
 select * from sq2
   union
 select * from sq3); 
 
  MIN_NR     MAX_NR MIN_GUID_12  MAX_GUID_12    NR_COUNT   NR_COUNT
  ------ ---------- ------------ ------------ ---------- ----------
       1    3000000 00E804347C2E 00E8046242ED    3000000    3000000
So SEQ_INC_BY_1 is a Sequence Number, increasing 1 per sys_guid call, initiated by a number related to epoch time of v$session logon_time (probably cached in each v$process.spid).

SEQ_INC_BY_1 is 12 hex digits, with maximum decimal value:

  select to_number(lpad('F', 12, 'F'), lpad('X', 12, 'X')) from dual; 
    -- 281474976710655
The last 6 digits represents a pure calling sequence number, the rest prefix digits are UNIX epoch seconds. So the maximum seconds is:

   281474976
Since 281474976 seconds is about 3258 days (281474976/86400) or about 9 years, sys_guid is wrapped on overflow about each 9 years. The first 10 reset datetime can be projected as follows:

select level NR#
      ,to_date('1970-01-01 00:00:00', 'yyyy-mm-dd hh24:mi:ss') + level*281474976/86400 datetime
  from dual connect by level <= 10;

    NR# DATETIME
  ----- --------------------
      1 1978*DEC*02 19:29:36
      2 1987*NOV*03 14:59:12
      3 1996*OCT*04 10:28:48
      4 2005*SEP*05 05:58:24
      5 2014*AUG*07 01:28:00
      6 2023*JUL*08 20:57:36
      7 2032*JUN*08 16:27:12
      8 2041*MAY*10 11:56:48
      9 2050*APR*11 07:26:24
     10 2059*MAR*13 02:56:00
16 bytes sys_guid is a 32 long raw hex value. In each interval, 10/16 of them are starting with number 0-9, 6/16 with A-F.

Given a sys_guid, we can estimate its datetime by:

with sq as (select to_number(substr('00E928C3120917BAE063A005740AC0F2', 1, 12), lpad('X', 12, 'X'))/1e6 epoch_reminder from dual)
select epoch_reminder
      ,to_date('2023*JUL*08 20:57:36', 'YYYY*MON*DD hh24:mi:ss') + epoch_reminder/86400 estimated_datetime
from  sq;

  EPOCH_REMINDER   ESTIMATED_DATETIME
  --------------   --------------------
      1001411.25   2023*JUL*20 11:07:47
For each logon_time, we can estimate SEQ_INC_BY_1 as follows:

create or replace function seq_inc_by_1_calc (logon_time date) return varchar2 as
  l_max_seconds   number;
  l_logon_utc     date;
  l_logon_epoch   number; 
  l_inc_by_1_calc varchar2(20);
begin
  l_max_seconds   := trunc(to_number(lpad('F', 12, 'F'), lpad('X', 12, 'X'))/1e6);
  l_logon_utc     := cast(from_tz(cast(logon_time as timestamp), to_char(systimestamp, 'TZH:TZM')) at time zone 'UTC' as date);
  l_logon_epoch   := (l_logon_utc - to_date('1970-01-01 00:00:00','YYYY-MM-DD HH24:MI:SS')) * 86400;
  l_inc_by_1_calc := to_char(mod(l_logon_epoch, l_max_seconds)*1e6, lpad('X', 12, '0'));
  return l_inc_by_1_calc;
end;
/
For example, here is a test to show generated first 12 hex digits and calculated SEQ_INC_BY_1 (seq_inc_by_1_calc):

with sq as (
  select /*+ materialize */ logon_time, sys_guid() guid, seq_inc_by_1_calc(logon_time) seq_inc_by_1_calc
    from v$session 
   where sid = (select sid from v$mystat where rownum=1))
select logon_time
      ,guid
      ,substr(guid, 1, 12) seq_inc_by_1
      ,seq_inc_by_1_calc
      ,trunc(to_number(substr(guid, 1, 12), lpad('X', 12, 'X'))/1e6) seq_inc_by_1_num
      ,trunc(to_number(seq_inc_by_1_calc, lpad('X', 12, 'X'))/1e6)        seq_inc_by_1_calc_num
  from sq;

  LOGON_TIME           GUID                             SEQ_INC_BY_1   SEQ_INC_BY_1_CALC  SEQ_INC_BY_1_NUM SEQ_INC_BY_1_CALC_NUM
  -------------------- -------------------------------- -------------- ------------------ ---------------- ---------------------
  2023-AUG-02 09:11:33 01EC602EB9C2259DE0636E08740A29BD 01EC602EB9C2    01EB5FD90340               2114737               2110437
To verify the above SEQ_INC_BY_1 estimation, we run Oracle Jobs to collect both generated and computed value and then run query to compare the differences.

drop table test_guid_tab;

create sequence test_guid_seq;

create table test_guid_tab as
  select -123456789 job_id, test_guid_seq.nextval seq, sys_guid() guid
        ,logon_time, sid, s.serial#, spid, pid, sysdate datetime
   from v$session s, v$process p 
   where s.paddr=p.addr and sid = (select sid from v$mystat where rownum=1) and 1=2;

create or replace procedure test_tab_guid_proc(p_job_id number, p_sleep_seconds number := 1) as
begin
  insert into test_guid_tab
  select p_job_id, test_guid_seq.nextval seq, sys_guid() guid
        ,logon_time, sid, s.serial#, spid, pid, sysdate datetime
   from v$session s, v$process p 
   where s.paddr=p.addr and sid = (select sid from v$mystat where rownum=1);
  commit;
  dbms_session.sleep(p_sleep_seconds);
end;
/

--exec test_tab_guid_proc(0);

-- select * from test_guid_tab;

-- collect sys_guid by jobs. Each new started job logs on as a new session with a new logon_time.
create or replace procedure test_tab_guid_job(p_job_cnt number, p_dur_seconds number, p_sleep_seconds number := 1) as
  l_job_name varchar2(50);
begin
  for i in 1..p_job_cnt loop
    l_job_name := 'TEST_JOB_GUID_'||i;
    dbms_scheduler.create_job (
      job_name        => l_job_name,
      job_type        => 'PLSQL_BLOCK',
      job_action      => 
        'begin 
           test_tab_guid_proc('||i||', '||p_sleep_seconds||');
        end;',    
      start_date      => systimestamp,
      end_date        => systimestamp + numtodsinterval(p_dur_seconds, 'SECOND'),
      repeat_interval => 'freq=SECONDLY',
      auto_drop       => true,
      enabled         => true);
  end loop;
end;
/

-- 32 Job sessions running 100 seconds, each time a new logon_time for each session.
--exec test_tab_guid_job(32, 100);


-- check the difference of Oracle generated GUID and calculated GUID

select t.*, seq_inc_by_1_calc(logon_time) guid_12_calc
      ,substr(guid, 1, 12) guid_12
      ,seq_inc_by_1_calc(logon_time) guid_12_calc
      ,trunc(to_number(substr(guid, 1, 12), lpad('X', 12, 'X'))/1e6)            guid_12_num
      ,trunc(to_number(seq_inc_by_1_calc(logon_time), lpad('X', 12, 'X'))/1e6)  guid_12_calc_num
      ,trunc(to_number(substr(guid, 1, 12), lpad('X', 12, 'X'))/1e6)
       -
       trunc(to_number(seq_inc_by_1_calc(logon_time), lpad('X', 12, 'X'))/1e6)  delta
from test_guid_tab t
--where spid=32287
order by sid, logon_time;
By the way, sys_guid() is internally used to generate Oracle AQ msgid out parameter in DBMS_AQ.ENQUEUE and DBMS_AQ.DEQUEUE call. (for some example, see Blog: Oracle Multi-Consumer AQ dequeue with dbms_aq.listen Wait Events (I) )

sys_guid() is Oracle internal subroutine kokidgen. In oracle STANDARD package, it is pesguid.