Home > database >  Issue using CHARINDEX function in SQL Server
Issue using CHARINDEX function in SQL Server

Time:10-29

could someone help me? I'm trying to get a specific value in my delimited column.

Here is my Data

Column_A is my data Column_B is what I could get Column_C is what I want

Basically I'm trying to get the values between the 3rd ":" and the 4th ":"

I'm using this piece of code here:

select SourceID
     , SUBSTRING(SourceID,CHARINDEX(':', SourceID, CHARINDEX(':', SourceID)   1)   1,
       CHARINDEX(':', SourceID, CHARINDEX(':', SourceID, CHARINDEX(':', SourceID)   1)   1) -6)
 from  temp.table

Thanks in advance

CodePudding user response:

You may try with a recursive CTE to retrieve any part of the string as you wish. Something like this

CREATE TABLE #Temp
(
    MyString NVARCHAR(2000)
)

INSERT INTO #TEMP
VALUES('42:45:ABCD:GGRFG34:SADSAD'),('65:213:5435423:234234')

;WITH CTE AS
(
    SELECT
        ParentSTring = MyString,
        MyString = CASE CHARINDEX(':',MyString) WHEN 0 THEN NULL ELSE SUBSTRING(MyString,CHARINDEX(':',MyString) 1,LEN(MyString)) END,
        Part = CASE CHARINDEX(':',MyString) WHEN 0 THEN MyString ELSE SUBSTRING(MyString,1,CHARINDEX(':',MyString)-1) END,
        Seq = 1
    FROM 
        #Temp

    UNION ALL

    SELECT
        ParentSTring,
        MyString = CASE CHARINDEX(':',MyString) WHEN 0 THEN NULL ELSE SUBSTRING(MyString,CHARINDEX(':',MyString) 1,LEN(MyString)) END,
        Part = CASE CHARINDEX(':',MyString) WHEN 0 THEN MyString ELSE SUBSTRING(MyString,1,CHARINDEX(':',MyString)-1) END,
        Seq = ISNULL(Seq,0) 1
    FROM 
        CTE
    WHERE 
        ISNULL(MyString, '') <> ''
)
SELECT
    *
FROM 
    CTE
WHERE 
    Seq = 3 -- for retrieving the 3rd string, change this accordingly

Result

enter image description here

CodePudding user response:

First, if performance is important then a recursive CTE is NOT what you want. I demonstrate why in a moment.

I have a simple solution here, called SubstringBetween8K, but it's overkill for what you are doing. For this a simple Cascading APPLY will do the trick and perform the best. First the sample data:

IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp;
GO
CREATE TABLE #temp (SourceId VARCHAR(1000));
INSERT #temp VALUES('42:45:10856x2019035x1200:GGRFG34:SADSAD.9999999999999999'),
                   ('65:213:999555x2019035x9444:5435423:234234,123123.111'),
                   ('999:12344:5555511056x35x9111:5435423:234234,555555555555'),
                   ('225:0:11056x2019035x9444:5435423:ABAFLHG.882');

Next for the Cascading APPLY solution.

SELECT      Item = SUBSTRING(t.SourceId,  f2.Pos 1, f3.Pos-f2.Pos-1)
FROM        #temp AS t
CROSS APPLY (VALUES(CHARINDEX(':',t.SourceId)))          AS f1(Pos)
CROSS APPLY (VALUES(CHARINDEX(':',t.SourceId,f1.Pos 1))) AS f2(Pos)
CROSS APPLY (VALUES(CHARINDEX(':',t.SourceId,f2.Pos 1))) AS f3(Pos);

Results:

Item
------------------------
10856x2019035x1200
999555x2019035x9444
5555511056x35x9111
11056x2019035x9444

Now a quick performance test which will demonstrate why not to use a recursive CTE.

--==== Sample data
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp;
GO
CREATE TABLE #temp (SourceId VARCHAR(1000));
INSERT #temp VALUES('42:45:10856x2019035x1200:GGRFG34:SADSAD.9999999999999999'),
                   ('65:213:999555x2019035x9444:5435423:234234,123123.111'),
                   ('999:12344:5555511056x35x9111:5435423:234234,555555555555'),
                   ('225:0:11056x2019035x9444:5435423:ABAFLHG.882');

--==== Add 10K rows for performance testing
INSERT #temp
SELECT TOP (100000) sourceId 
FROM   #temp
CROSS JOIN sys.all_columns, sys.all_columns AS b
GO

--==== Performance Test
IF OBJECT_ID('tempdb..#t1') IS NOT NULL DROP TABLE #t1;
IF OBJECT_ID('tempdb..#t2') IS NOT NULL DROP TABLE #t2;
GO

SET STATISTICS TIME, IO ON;

PRINT CHAR(10) 'Cascading CTE' CHAR(10) REPLICATE('-',90);

SELECT      Item = SUBSTRING(t.SourceId,  f2.Pos 1, f3.Pos-f2.Pos-1)
INTO        #t1
FROM        #temp AS t
CROSS APPLY (VALUES(CHARINDEX(':',t.SourceId)))          AS f1(Pos)
CROSS APPLY (VALUES(CHARINDEX(':',t.SourceId,f1.Pos 1))) AS f2(Pos)
CROSS APPLY (VALUES(CHARINDEX(':',t.SourceId,f2.Pos 1))) AS f3(Pos);


PRINT CHAR(10) 'Recursive CTE' CHAR(10) REPLICATE('-',90);
;WITH CTE AS
(
    SELECT
        ParentSTring = SourceId,
        SourceId = CASE CHARINDEX(':',SourceId) WHEN 0 THEN NULL ELSE SUBSTRING(SourceId,CHARINDEX(':',SourceId) 1,LEN(SourceId)) END,
        Part = CASE CHARINDEX(':',SourceId) WHEN 0 THEN SourceId ELSE SUBSTRING(SourceId,1,CHARINDEX(':',SourceId)-1) END,
        Seq = 1
    FROM  #temp 
    UNION ALL
    SELECT
        ParentSTring,
        MyString = CASE CHARINDEX(':',SourceId) WHEN 0 THEN NULL ELSE SUBSTRING(SourceId,CHARINDEX(':',SourceId) 1,LEN(SourceId)) END,
        Part = CASE CHARINDEX(':',SourceId) WHEN 0 THEN SourceId ELSE SUBSTRING(SourceId,1,CHARINDEX(':',SourceId)-1) END,
        Seq = ISNULL(Seq,0) 1
    FROM  CTE
    WHERE ISNULL(SourceId, '') <> ''
)
SELECT Part
INTO   #t2
FROM   CTE
WHERE  Seq = 3

SET STATISTICS TIME, IO OFF;

Test Results:

Cascading CTE
------------------------------------------------------------------------------------------
Table '#temp'. Scan count 9, logical reads 807, physical reads 0...

 SQL Server Execution Times: CPU time = 327 ms,  elapsed time = 111 ms.

Recursive CTE
------------------------------------------------------------------------------------------
Table 'Worktable'. Scan count 2, logical reads 4221845, physical reads 0...
Table '#temp'. Scan count 1, logical reads 807, physical reads 0...

 SQL Server Execution Times: CPU time = 8781 ms,  elapsed time = 9370 ms.

From 1/10th of a second, down from 10 seconds. A roughly 100X performance improvement. Part of the issue with the recursive CTE is the excessive IO (reads). Note the 4.3 million reads for a simple 10K rows.

  • Related