Sunday, August 3, 2025

Blog List

Reading CLOB Line by Line with SQL and Plsql: ChatGPT and Gemini Implementations

Following previous Blog: Reading CLOB Line by Line with SQL and Plsql,
we will show and test SQL and Plsql implementations provided by ChatGPT and Gemini.

Note: Tested on Oracle 19c.


1. ChatGPT


1.1 ChatGPT SQL Implementation


Ask: 
  SQL-Only: Read Oracle CLOB Line by Line

Answer: 

WITH clob_data AS (
    SELECT your_clob_column AS clob_text
    FROM your_table
    WHERE your_condition
),
lines AS (
    SELECT LEVEL AS line_num,
           TRIM(REGEXP_SUBSTR(clob_text, '[^(\r\n)]+', 1, LEVEL)) AS line_text
    FROM clob_data
    CONNECT BY REGEXP_SUBSTR(clob_text, '[^(\r\n)]+', 1, LEVEL) IS NOT NULL
)
SELECT line_num, line_text
FROM lines;

----------------------- Test Procedure: Reading 1,000 Lines takes 3 Minutes -----------------------

create or replace procedure SQL_ChatGPT(p_id number) is
  l_row_cnt      number := power(10, p_id);  -- requires rownum limit, otherwise endless loop
  l_cnt          number := 0;
  l_len          number := 0;
begin
  for c in (
with clob_data as (select mylob clob_text from test_tab_lob where id = p_id),
lines AS (
    SELECT LEVEL AS line_num,
           TRIM(REGEXP_SUBSTR(clob_text, '[^' || chr(10) || ']+', 1, LEVEL)) AS line_text
    FROM clob_data
    CONNECT BY REGEXP_SUBSTR(clob_text,  '[^' || chr(10) || ']+', 1, LEVEL) IS NOT NULL and rownum <=l_row_cnt
)
SELECT line_num, line_text
FROM lines
  ) loop
    l_cnt := l_cnt + 1;
    l_len := l_len + length(c.line_text);
  end loop;
  dbms_output.put_line('CNT = '||l_cnt ||', Length = '||l_len);
end;
/


SQL > exec SQL_ChatGPT(3);
  CNT = 1,000, Length = 509,455
  Elapsed: 00:03:12.20
The performance is the similar to SQL_SLOW in previous Blog, Quadratic Performance (O(n^2)) and heavy "direct path read".
In the original code, rownum limit condition has to be added, otherwise endless loop.


1.2 ChatGPT Plsql Implementation


Ask:
  PL/SQL code to read a CLOB line by line

Answer:

DECLARE
  v_clob       CLOB;
  v_line       VARCHAR2(32767);
  v_pos        INTEGER := 1;
  v_newline    INTEGER;
  v_clob_len   INTEGER;
BEGIN
  -- Get your CLOB (example: first row from your_table)
  SELECT your_clob_column INTO v_clob FROM your_table WHERE rownum = 1;

  v_clob_len := DBMS_LOB.getlength(v_clob);

  WHILE v_pos <= v_clob_len LOOP
    -- Find next newline position
    v_newline := DBMS_LOB.INSTR(v_clob, CHR(10), v_pos);

    IF v_newline = 0 THEN
      -- No more newlines: read till end of CLOB
      v_line := DBMS_LOL.SUBSTR(v_clob, v_clob_len - v_pos + 1, v_pos);
      DBMS_OUTPUT.PUT_LINE(v_line);
      EXIT;
    ELSE
      -- Read from current pos up to newline -1 (excluding newline)
      v_line := DBMS_LOB.SUBSTR(v_clob, v_newline - v_pos, v_pos);
      DBMS_OUTPUT.PUT_LINE(v_line);

      -- Move position to character after newline
      v_pos := v_newline + 1;
    END IF;
  END LOOP;
END;
Note: above code contains: "DBMS_LOL", it is not clear how it comes from.

----------------------- Test Procedure: Reading 100,000 Lines takes 47 Seconds -----------------------

create or replace procedure PLSQL_ChatGPT(p_id number) is
--DECLARE
  v_clob       CLOB;
  v_line       VARCHAR2(32767);
  v_pos        INTEGER := 1;
  v_newline    INTEGER;
  v_clob_len   INTEGER;
  l_cnt          number := 0;
  l_len          number := 0;
BEGIN
  -- Get your CLOB (example: first row from your_table)
  --SELECT your_clob_column INTO v_clob FROM your_table WHERE rownum = 1;
  select mylob into v_clob from test_tab_lob where id = p_id;

  v_clob_len := DBMS_LOB.getlength(v_clob);

  WHILE v_pos <= v_clob_len LOOP
    -- Find next newline position
    v_newline := DBMS_LOB.INSTR(v_clob, CHR(10), v_pos);

    IF v_newline = 0 THEN
      -- No more newlines: read till end of CLOB
      -- v_line := DBMS_LOL.SUBSTR(v_clob, v_clob_len - v_pos + 1, v_pos);    
      v_line := DBMS_LOB.SUBSTR(v_clob, v_clob_len - v_pos + 1, v_pos);
      DBMS_OUTPUT.PUT_LINE(v_line);
      EXIT;
    ELSE
      -- Read from current pos up to newline -1 (excluding newline)
      v_line := DBMS_LOB.SUBSTR(v_clob, v_newline - v_pos, v_pos);
      --DBMS_OUTPUT.PUT_LINE(v_line);

      -- Move position to character after newline
      v_pos := v_newline + 1;
    END IF;
    
    l_cnt := l_cnt + 1;
    l_len := l_len + length(v_line);
  END LOOP;
  dbms_output.put_line('CNT = '||l_cnt ||', Length = '||l_len);
END;
/

SQL > exec PLSQL_ChatGPT(5);
  CNT = 100,000, Length = 99,509,455
  Elapsed: 00:00:47.04


2. Gemini


2.1 Gemini SQL Implementation


Ask: 
  read oracle clob line by line with pure recursive SQL

Answer: 

WITH
  clob_data AS (
    -- Replace this CTE with your actual table and CLOB column
    SELECT
      TO_CLOB(
        'This is the first line of the CLOB.' || CHR(10) ||
        'Second line is here.' || CHR(10) ||
        'And the very last line, third one.'
      ) AS my_clob
    FROM
      dual
  ),
  lines_recursive (line_num, current_pos, remaining_clob, line_text) AS (
    -- Anchor Member: Start with the first line
    SELECT
      1 AS line_num,
      1 AS current_pos,
      c.my_clob AS remaining_clob,
      CASE
        WHEN INSTR(c.my_clob, CHR(10), 1) > 0 THEN SUBSTR(c.my_clob, 1, INSTR(c.my_clob, CHR(10), 1) - 1)
        ELSE c.my_clob
      END AS line_text
    FROM
      clob_data c
    UNION ALL
    -- Recursive Member: Process the next line
    SELECT
      lr.line_num + 1 AS line_num,
      INSTR(lr.remaining_clob, CHR(10), 1) + 1 AS current_pos,
      SUBSTR(lr.remaining_clob, INSTR(lr.remaining_clob, CHR(10), 1) + 1) AS remaining_clob,
      CASE
        WHEN INSTR(SUBSTR(lr.remaining_clob, INSTR(lr.remaining_clob, CHR(10), 1) + 1), CHR(10), 1) > 0 
             THEN SUBSTR(SUBSTR(lr.remaining_clob, INSTR(lr.remaining_clob, CHR(10), 1) + 1), 1, 
                INSTR(SUBSTR(lr.remaining_clob, INSTR(lr.remaining_clob, CHR(10), 1) + 1), CHR(10), 1) - 1)
        ELSE SUBSTR(lr.remaining_clob, INSTR(lr.remaining_clob, CHR(10), 1) + 1)
      END AS line_text
    FROM
      lines_recursive lr
    WHERE
      INSTR(lr.remaining_clob, CHR(10), 1) > 0 -- Continue as long as there are more newlines
  )
SELECT
  line_num,
  line_text
FROM
  lines_recursive;
First time when asking "read oracle clob line by line with SQL only", Gemini replied:
     I'm sorry, but it is not possible to read an Oracle CLOB line by line using SQL only.'

----- Test Procedure: Reading 10,000 Lines hit "ORA-01652: unable to extend temp segment by  in tablespace" ------

create or replace procedure SQL_Gemini(p_id number) is
  l_cnt          number := 0;
  l_len          number := 0;
begin
  for c in (
with clob_data as (select mylob my_clob from test_tab_lob where id = p_id),
  lines_recursive (line_num, current_pos, remaining_clob, line_text) AS (
    -- Anchor Member: Start with the first line
    SELECT
      1 AS line_num,
      1 AS current_pos,
      c.my_clob AS remaining_clob,
      CASE
        WHEN INSTR(c.my_clob, CHR(10), 1) > 0 THEN SUBSTR(c.my_clob, 1, INSTR(c.my_clob, CHR(10), 1) - 1)
        ELSE c.my_clob
      END AS line_text
    FROM
      clob_data c
    UNION ALL
    -- Recursive Member: Process the next line
    SELECT
      lr.line_num + 1 AS line_num,
      INSTR(lr.remaining_clob, CHR(10), 1) + 1 AS current_pos,
      SUBSTR(lr.remaining_clob, INSTR(lr.remaining_clob, CHR(10), 1) + 1) AS remaining_clob,
      CASE
        WHEN INSTR(SUBSTR(lr.remaining_clob, INSTR(lr.remaining_clob, CHR(10), 1) + 1), CHR(10), 1) > 0 
             THEN SUBSTR(SUBSTR(lr.remaining_clob, INSTR(lr.remaining_clob, CHR(10), 1) + 1), 1, 
                INSTR(SUBSTR(lr.remaining_clob, INSTR(lr.remaining_clob, CHR(10), 1) + 1), CHR(10), 1) - 1)
        ELSE SUBSTR(lr.remaining_clob, INSTR(lr.remaining_clob, CHR(10), 1) + 1)
      END AS line_text
    FROM
      lines_recursive lr
    WHERE
      INSTR(lr.remaining_clob, CHR(10), 1) > 0 -- Continue as long as there are more newlines
  )
SELECT
  line_num,
  line_text
FROM
  lines_recursive
  ) loop
    l_cnt := l_cnt + 1;
    l_len := l_len + length(c.line_text);
  end loop;
  dbms_output.put_line('CNT = '||l_cnt ||', Length = '||l_len);
end;
/

SQL > exec SQL_Gemini(4);
  BEGIN SQL_Gemini(4); END;
  *
  ERROR at line 1:
  ORA-01652: unable to extend temp segment by  in tablespace
  ORA-06512: at "K.SQL_GEMINI", line 6
  ORA-06512: at "K.SQL_GEMINI", line 6
  ORA-06512: at line 1
  Elapsed: 00:01:38.98

-- v$active_session_history.temp_space_allocated shows about 8 GB.


2.2 Gemini Plsql Implementation


Ask:
  read oracle clob line by line with PLSQL only

Answer:

DECLARE
    l_clob              CLOB;
    l_offset            NUMBER := 1;
    l_line_start        NUMBER := 1;
    l_line_end          NUMBER;
    l_line_buffer       VARCHAR2(32767); -- Max PL/SQL VARCHAR2 size
    l_newline_char_unix VARCHAR2(1) := CHR(10);
    l_newline_char_win  VARCHAR2(2) := CHR(13) || CHR(10);
    l_newline_length    NUMBER;
    l_clob_length       NUMBER;

BEGIN
    -- 1. Populate a CLOB for demonstration (replace with your actual CLOB source)
    l_clob := 'This is the first line.' || CHR(10) ||
              'This is the second line with some more text.' || CHR(10) ||
              'And a third line.' || CHR(13) || CHR(10) || -- Example with Windows newline
              'Fourth line after Windows newline.' || CHR(10) ||
              'Last line without a trailing newline.';

    l_clob_length := DBMS_LOB.GETLENGTH(l_clob);

    -- Loop through the CLOB
    WHILE l_offset <= l_clob_length LOOP
        -- Try to find Unix-style newline first
        l_line_end := INSTR(l_clob, l_newline_char_unix, l_offset);
        l_newline_length := 1; -- Length of Unix newline

        -- If not found or if a Windows newline is found earlier
        IF l_line_end = 0 OR (INSTR(l_clob, l_newline_char_win, l_offset) > 0 AND INSTR(l_clob, l_newline_char_win, l_offset) < l_line_end) THEN
            l_line_end := INSTR(l_clob, l_newline_char_win, l_offset);
            l_newline_length := 2; -- Length of Windows newline
        END IF;

        IF l_line_end = 0 THEN
            -- No more newlines found, this is the last "line" (or the only line)
            l_line_buffer := DBMS_LOB.SUBSTR(l_clob, l_clob_length - l_offset + 1, l_offset);
            l_offset := l_clob_length + 1; -- Exit loop
        ELSE
            -- Newline found, extract the line
            l_line_buffer := DBMS_LOB.SUBSTR(l_clob, l_line_end - l_offset, l_offset);
            l_offset := l_line_end + l_newline_length;
        END IF;

        -- Process the extracted line
        DBMS_OUTPUT.PUT_LINE('Line: "' || l_line_buffer || '"');
    END LOOP;

EXCEPTION
    WHEN OTHERS THEN
        DBMS_OUTPUT.PUT_LINE('An error occurred: ' || SQLERRM);
END;
/

----------------------- Test Procedure: Reading 100,000 Lines takes 44 Seconds -----------------------
-- Remove l_newline_char_win check, only check l_newline_char_unix
-- If check both l_newline_char_win and l_newline_char_unix, it takes 

create or replace procedure PLSQL_Gemini(p_id number, p_check_both number:= 0) is
    l_clob              CLOB;
    l_offset            NUMBER := 1;
    l_line_start        NUMBER := 1;
    l_line_end          NUMBER;
    l_line_buffer       VARCHAR2(32767); -- Max PL/SQL VARCHAR2 size
    l_newline_char_unix VARCHAR2(1) := CHR(10);
    l_newline_char_win  VARCHAR2(2) := CHR(13) || CHR(10);
    l_newline_length    NUMBER;
    l_clob_length       NUMBER;
    l_cnt               number := 0;
    l_len               number := 0;
BEGIN
    -- 1. Populate a CLOB for demonstration (replace with your actual CLOB source)
    select mylob into l_clob from test_tab_lob where id = p_id;
    l_clob_length := DBMS_LOB.GETLENGTH(l_clob);

    -- Loop through the CLOB
    WHILE l_offset <= l_clob_length LOOP
        -- Try to find Unix-style newline first
        l_line_end := INSTR(l_clob, l_newline_char_unix, l_offset);
        l_newline_length := 1; -- Length of Unix newline

        -- If not found or if a Windows newline is found earlier
        if p_check_both > 0 then 
          IF l_line_end = 0 OR (INSTR(l_clob, l_newline_char_win, l_offset) > 0 AND INSTR(l_clob, l_newline_char_win, l_offset) < l_line_end) THEN
              l_line_end := INSTR(l_clob, l_newline_char_win, l_offset);
              l_newline_length := 2; -- Length of Windows newline
          END IF;
        end if;

        IF l_line_end = 0 THEN
            -- No more newlines found, this is the last "line" (or the only line)
            l_line_buffer := DBMS_LOB.SUBSTR(l_clob, l_clob_length - l_offset + 1, l_offset);
            l_offset := l_clob_length + 1; -- Exit loop
        ELSE
            -- Newline found, extract the line
            l_line_buffer := DBMS_LOB.SUBSTR(l_clob, l_line_end - l_offset, l_offset);
            l_offset := l_line_end + l_newline_length;
        END IF;

        -- Process the extracted line
        --DBMS_OUTPUT.PUT_LINE('Line: "' || l_line_buffer || '"');
        l_cnt := l_cnt + 1;
        l_len := l_len + length(l_line_buffer);
    END LOOP;
    dbms_output.put_line('CNT = '||l_cnt ||', Length = '||l_len);
EXCEPTION
    WHEN OTHERS THEN
        DBMS_OUTPUT.PUT_LINE('An error occurred: ' || SQLERRM);
END;
/

-- Remove l_newline_char_win check, only check l_newline_char_unix

SQL > exec PLSQL_Gemini(5, 0);
  CNT = 100,000, Length = 99,509,455
  Elapsed: 00:00:44.32
  
-- Check both l_newline_char_win and l_newline_char_unix

SQL > exec PLSQL_Gemini(4, 1);  

$> perf top -d 2 -p 2464404  

   PerfTop:    2337 irqs/sec  kernel: 0.0%  exact:  0.0% lost: 0/0 drop: 0/0 [4000Hz cpu-clock:uhpppH],  (target_pid: 2464404)
-------------------------------------------------------------------------------------------------------------------------------------------

    42.87%  oracle         [.] kole_simple_string_match
    31.51%  oracle         [.] _intel_fast_memcmp
     2.50%  oracle         [.] kcbgtcr
     1.97%  libc-2.28.so   [.] syscall
     1.02%  oracle         [.] sxorchk
     0.86%  oracle         [.] __intel_avx_rep_memcpy


3. Test Outcome


3.1. ChatGPT SQL


SQL > exec SQL_ChatGPT(3);
  CNT = 1,000, Length = 509,455
  Elapsed: 00:03:04.59

SQL > exec SQL_ChatGPT(4);
  CNT = 10,000, Length = 9,509,455
  Elapsed: 08:20:01.13
 
SQL > exec SQL_ChatGPT(5);
 -- Not finished after 10 hours


3.2. ChatGPT PLSQL


SQL > exec PLSQL_ChatGPT(3);
  CNT = 1,000, Length = 509,455
  Elapsed: 00:00:00.38

SQL > exec PLSQL_ChatGPT(4);
  CNT = 10,000, Length = 9,509,455
  Elapsed: 00:00:04.04

SQL > exec PLSQL_ChatGPT(5);
  CNT = 100,000, Length = 99,509,455
  Elapsed: 00:00:36.66


3.3. Gemini SQL


SQL > exec SQL_Gemini(3);
  CNT = 1001, Length = 509455
  Elapsed: 00:00:05.60

SQL > exec SQL_Gemini(4);
  BEGIN SQL_Gemini(4); END;
  *
  ERROR at line 1:
  ORA-01652: unable to extend temp segment by  in tablespace
  ORA-06512: at "K.SQL_GEMINI", line 6
  ORA-06512: at "K.SQL_GEMINI", line 6
  ORA-06512: at line 1
  
  Elapsed: 00:01:35.50


3.4. Gemini PLSQL


SQL > exec PLSQL_Gemini(3);
  CNT = 1,000, Length = 509,455
  Elapsed: 00:00:00.35

SQL > exec PLSQL_Gemini(4);
  CNT = 10,000, Length = 9,509,455
  Elapsed: 00:00:04.28

SQL > exec PLSQL_Gemini(5);
  CNT = 100,000, Length = 99,509,455
  Elapsed: 00:00:38.75


3.5. Test Summary


(A). Both ChatGPT and Gemini provided efficient Plsql implementations.
(B). ChatGPT SQL implementation has the similar performance as SQL_SLOW in previous Blog (heavy "direct path read").
(C). Germin SQL implementation hits "ORA-01652: unable to extend temp segment by in tablespace" for 10,000 CLOB lines.
(D). Small adaptations/fixes could be needed to make them runnable.