Home > Mobile >  Parse XML file in SQL with duplicated Tags
Parse XML file in SQL with duplicated Tags

Time:09-23

I'm trying to flatten my XML file however there are complications when there are duplicate tags within a tag. I'm not sure how to move back up the tag chain. Examples below.

DECLARE @xml AS XMl, @hDoc AS INT
SET @xml = '<Message>
    <Body>
        <SfkpgAcctAndHldgs>
            <SfkpgAcct>UHS5465</SfkpgAcct>
            <AcctSubLvl>
                <Dsclsr>
                    <SfkpgAcct>450812361</SfkpgAcct>
                    <AcctHldr>
                        <LglPrsn>
                            <NmAndAdr>
                                <Nm>Foo</Nm>
                                <Adr>
                                    <AdrLine>1 CORPORATE WAY, New York 15951</AdrLine>
                                    <Ctry>US</Ctry>
                                </Adr>
                            </NmAndAdr>
                            <LEI>5493231HPIWHJS5QL7N39</LEI>
                            <Ownrsh>OWNR</Ownrsh>
                        </LglPrsn>
                    </AcctHldr>
                    <ShrhldgBal>
                        <ShrhldgTp>BENE</ShrhldgTp>
                        <Unit>1000</Unit>
                    </ShrhldgBal>
                </Dsclsr>
                <Dsclsr>
                    <SfkpgAcct>450812362</SfkpgAcct>
                    <AcctHldr>
                        <LglPrsn>
                            <NmAndAdr>
                                <Nm>Bar</Nm>
                                <Adr>
                                    <AdrLine>2 CORPORATE WAY</AdrLine>
                                    <AdrLine>New York</AdrLine>
                                    <AdrLine>15951</AdrLine>
                                    <Ctry>US</Ctry>
                                </Adr>
                            </NmAndAdr>
                            <LEI>5493231HP2342345QL7N39</LEI>
                            <Ownrsh>OWNR</Ownrsh>
                        </LglPrsn>
                    </AcctHldr>
                    <ShrhldgBal>
                        <ShrhldgTp>BENE</ShrhldgTp>
                        <Unit>300</Unit>
                    </ShrhldgBal>
                </Dsclsr>
            </AcctSubLvl>
        </SfkpgAcctAndHldgs>        
    </Body>
</message>'
EXEC sp_xml_preparedocument @hDoc OUTPUT, @XML 

This query takes each SfkpgAcctAndHldgs and brings in the first Dsclsr, there are two but only the first it returned.

    SELECT * 
    FROM OPENXML(@hDoc,'message/Body/SfkpgAcctAndHldgs') WITH (
          SfkpgAcct                                                     NVARCHAR(MAX)   'SfkpgAcct'
        , Dsclsr_SfkpgAcct                                              NVARCHAR(MAX)   'AcctSubLvl/Dsclsr/SfkpgAcct'
        , Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Nm                           NVARCHAR(MAX)   'AcctSubLvl/Dsclsr/AcctHldr/LglPrsn/NmAndAdr/Nm'
        , Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine                  NVARCHAR(MAX)   'AcctSubLvl/Dsclsr/AcctHldr/LglPrsn/NmAndAdr/Adr/AdrLine'
        , Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_Ctry                     NVARCHAR(MAX)   'AcctSubLvl/Dsclsr/AcctHldr/LglPrsn/NmAndAdr/Adr/Ctry'
        , Dsclsr_AcctHldr_LglPrsn_LEI                                   NVARCHAR(MAX)   'AcctSubLvl/Dsclsr/AcctHldr/LglPrsn/LEI'
        , Dsclsr_AcctHldr_LglPrsn_Ownrsh                                NVARCHAR(MAX)   'AcctSubLvl/Dsclsr/AcctHldr/LglPrsn/Ownrsh'
        , Dsclsr_ShrhldgBal_ShrhldgTp                                   NVARCHAR(MAX)   'AcctSubLvl/Dsclsr/ShrhldgBal/ShrhldgTp'
        , Dsclsr_ShrhldgBal_Unit                                        NVARCHAR(MAX)   'AcctSubLvl/Dsclsr/ShrhldgBal/Unit'
        )
--SfkpgAcct AcctSubLvl_Dsclsr_SfkpgAcct Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Nm Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine    Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_Ctry   Dsclsr_AcctHldr_LglPrsn_LEI Dsclsr_AcctHldr_LglPrsn_Ownrsh  Dsclsr_AcctHldr_ShrhldgBal_ShrhldgTp    Dsclsr_AcctHldr_ShrhldgBal_Unit
--UHS5465   450812361                   Foo                                 1 CORPORATE WAY, New York 15951                 US  5493231                                 HPIWHJS5QL7N39              OWNR                            BENE                                    1000

This query looks at the Dsclsr level and so brings back both lines. However I can't go back up the Chain to get the first SfkpgAcct value, which is a unique value I can join the tables on.


    SELECT * 
    FROM OPENXML(@hDoc,'message/Body/SfkpgAcctAndHldgs/AcctSubLvl/Dsclsr') WITH (
          SfkpgAcct                                                     NVARCHAR(MAX)   'SfkpgAcct'
        , AcctHldr_LglPrsn_NmAndAdr_Nm                                  NVARCHAR(MAX)   'AcctHldr/LglPrsn/NmAndAdr/Nm'
        , AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine                         NVARCHAR(MAX)   'AcctHldr/LglPrsn/NmAndAdr/Adr/AdrLine'
        , AcctHldr_LglPrsn_NmAndAdr_Adr_Ctry                            NVARCHAR(MAX)   'AcctHldr/LglPrsn/NmAndAdr/Adr/Ctry'
        , AcctHldr_LglPrsn_LEI                                          NVARCHAR(MAX)   'AcctHldr/LglPrsn/LEI'
        , AcctHldr_LglPrsn_Ownrsh                                       NVARCHAR(MAX)   'AcctHldr/LglPrsn/Ownrsh'
        , ShrhldgBal_ShrhldgTp                                          NVARCHAR(MAX)   'ShrhldgBal/ShrhldgTp'
        , ShrhldgBal_Unit                                               NVARCHAR(MAX)   'ShrhldgBal/Unit'
        )

--SfkpgAcct AcctHldr_LglPrsn_NmAndAdr_Nm    AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine   AcctHldr_LglPrsn_NmAndAdr_Adr_Ctry  AcctHldr_LglPrsn_LEI    AcctHldr_LglPrsn_Ownrsh ShrhldgBal_ShrhldgTp    ShrhldgBal_Unit
--450812361 Foo                             1 CORPORATE WAY, New York 15951         US                                  5493231HPIWHJS5QL7N39   OWNR                    BENE                    1000
--450812362 Bar                             2 CORPORATE WAY                         US                                  5493231HP2342345QL7N39  OWNR                    BENE                    300

Also there are on occasions multiple AdrLine fields however in the same example as above I do not know who to go back up the chain to reference any previous values so I can then join on.

    SELECT * 
    FROM OPENXML(@hDoc,'message/Body/SfkpgAcctAndHldgs/AcctSubLvl/Dsclsr/AcctHldr/LglPrsn/NmAndAdr/Adr/AdrLine') WITH (
          AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine                         NVARCHAR(MAX)   '.'
        )
--AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine
--1 CORPORATE WAY, New York 15951
--2 CORPORATE WAY
--New York
--15951

EXEC sp_xml_removedocument @hDoc

Ideally, I want all values in a flat file, I can use a view to join the individual tables together providing I can attribute an existing ID or generate a new one.

--SfkpgAcct     DsclsrSfkpgAcct Nm      AdrLine1                        AdrLine2    AdrLine3    Ctry    LEI                     Ownrsh      ShrhldgTp   Unit
--UHS5465       450812361       Foo     1 CORPORATE WAY, New York 15951                         US      5493231HPIWHJS5QL7N39   OWNR        BENE        1000
--UHS5465       450812362       Bar     2 CORPORATE WAY                 New York    15951       US      5493231HP2342345QL7N39  OWNR        BENE        300
                                 

CodePudding user response:

Microsoft proprietary OPENXML() and its companions sp_xml_preparedocument and sp_xml_removedocument are kept just for backward compatibility with the obsolete SQL Server 2000. Their use is diminished just to very few fringe cases. Starting from SQL Server 2005 onwards, it is strongly recommended to re-write your SQL and switch it to XQuery.

SQL

DECLARE @xml XML =
N'<Message>
    <Body>
        <SfkpgAcctAndHldgs>
            <SfkpgAcct>UHS5465</SfkpgAcct>
            <AcctSubLvl>
                <Dsclsr>
                    <SfkpgAcct>450812361</SfkpgAcct>
                    <AcctHldr>
                        <LglPrsn>
                            <NmAndAdr>
                                <Nm>Foo</Nm>
                                <Adr>
                                    <AdrLine>1 CORPORATE WAY, New York 15951</AdrLine>
                                    <Ctry>US</Ctry>
                                </Adr>
                            </NmAndAdr>
                            <LEI>5493231HPIWHJS5QL7N39</LEI>
                            <Ownrsh>OWNR</Ownrsh>
                        </LglPrsn>
                    </AcctHldr>
                    <ShrhldgBal>
                        <ShrhldgTp>BENE</ShrhldgTp>
                        <Unit>1000</Unit>
                    </ShrhldgBal>
                </Dsclsr>
                <Dsclsr>
                    <SfkpgAcct>450812362</SfkpgAcct>
                    <AcctHldr>
                        <LglPrsn>
                            <NmAndAdr>
                                <Nm>Bar</Nm>
                                <Adr>
                                    <AdrLine>2 CORPORATE WAY</AdrLine>
                                    <AdrLine>New York</AdrLine>
                                    <AdrLine>15951</AdrLine>
                                    <Ctry>US</Ctry>
                                </Adr>
                            </NmAndAdr>
                            <LEI>5493231HP2342345QL7N39</LEI>
                            <Ownrsh>OWNR</Ownrsh>
                        </LglPrsn>
                    </AcctHldr>
                    <ShrhldgBal>
                        <ShrhldgTp>BENE</ShrhldgTp>
                        <Unit>300</Unit>
                    </ShrhldgBal>
                </Dsclsr>
            </AcctSubLvl>
        </SfkpgAcctAndHldgs>
    </Body>
</Message>';

SELECT c.value('(SfkpgAcct/text())[1]', 'VARCHAR(20)') AS SfkpgAcct
    , x.value('(SfkpgAcct/text())[1]', 'VARCHAR(20)') AS DsclsrSfkpgAcct
    , x.value('(AcctHldr/LglPrsn/NmAndAdr/Nm/text())[1]', 'VARCHAR(20)') AS Nm
    , x.value('(AcctHldr/LglPrsn/NmAndAdr/Adr/AdrLine[1]/text())[1]', 'VARCHAR(50)') AS AdrLine1
    , x.value('(AcctHldr/LglPrsn/NmAndAdr/Adr/AdrLine[2]/text())[1]', 'VARCHAR(50)') AS AdrLine2
    , x.value('(AcctHldr/LglPrsn/NmAndAdr/Adr/AdrLine[3]/text())[1]', 'VARCHAR(50)') AS AdrLine3
    , x.value('(AcctHldr/LglPrsn/NmAndAdr/Adr/Ctry/text())[1]', 'VARCHAR(50)') AS Ctry
    , x.value('(AcctHldr/LglPrsn/LEI/text())[1]', 'VARCHAR(50)') AS LEI
    , x.value('(AcctHldr/LglPrsn/Ownrsh/text())[1]', 'VARCHAR(50)') AS Ownrsh
    , x.value('(ShrhldgBal/ShrhldgTp/text())[1]', 'VARCHAR(50)') AS ShrhldgTp
    , x.value('(ShrhldgBal/Unit/text())[1]', 'VARCHAR(50)') AS Unit
FROM @xml.nodes('/Message/Body/SfkpgAcctAndHldgs') AS t1(c)
    CROSS APPLY t1.c.nodes('AcctSubLvl/Dsclsr') AS t2(x);

Output

 ----------- ----------------- ----- --------------------------------- ---------- ---------- ------ ------------------------ -------- ----------- ------ 
| SfkpgAcct | DsclsrSfkpgAcct | Nm  |            AdrLine1             | AdrLine2 | AdrLine3 | Ctry |          LEI           | Ownrsh | ShrhldgTp | Unit |
 ----------- ----------------- ----- --------------------------------- ---------- ---------- ------ ------------------------ -------- ----------- ------ 
| UHS5465   |       450812361 | Foo | 1 CORPORATE WAY, New York 15951 | NULL     | NULL     | US   | 5493231HPIWHJS5QL7N39  | OWNR   | BENE      | 1000 |
| UHS5465   |       450812362 | Bar | 2 CORPORATE WAY                 | New York | 15951    | US   | 5493231HP2342345QL7N39 | OWNR   | BENE      |  300 |
 ----------- ----------------- ----- --------------------------------- ---------- ---------- ------ ------------------------ -------- ----------- ------ 

CodePudding user response:

Dealing with XML in MS SQL server is a pain in the ... IMHO. Anyway:

SELECT SfkpgAcct,
       Dsclsr_SfkpgAcct,
       Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Nm,
       Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine1,
       Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine2,
       Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine3,
       Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_Ctry,
       Dsclsr_AcctHldr_LglPrsn_LEI,
       Dsclsr_AcctHldr_LglPrsn_Ownrsh,
       Dsclsr_ShrhldgBal_ShrhldgTp,
       Dsclsr_ShrhldgBal_Unit
FROM
    OPENXML(@hDoc, 'message/Body/SfkpgAcctAndHldgs')
    WITH
    (
        SfkpgAcct NVARCHAR(MAX) 'SfkpgAcct',
        AcctSubLvl XML 'AcctSubLvl'
    ) t1
    CROSS APPLY
(
    SELECT item.row.value('SfkpgAcct[1]', 'nvarchar(max)'),
           item.row.value('AcctHldr[1]/LglPrsn[1]/NmAndAdr[1]/Nm[1]', 'nvarchar(max)'),
           item.row.value('AcctHldr[1]/LglPrsn[1]/NmAndAdr[1]/Adr[1]/AdrLine[1]', 'nvarchar(max)'),
           item.row.value('AcctHldr[1]/LglPrsn[1]/NmAndAdr[1]/Adr[1]/AdrLine[2]', 'nvarchar(max)'),
           item.row.value('AcctHldr[1]/LglPrsn[1]/NmAndAdr[1]/Adr[1]/AdrLine[3]', 'nvarchar(max)'),
           item.row.value('AcctHldr[1]/LglPrsn[1]/NmAndAdr[1]/Adr[1]/Ctry[1]', 'nvarchar(max)'),
           item.row.value('AcctHldr[1]/LglPrsn[1]/LEI[1]', 'nvarchar(max)'),
           item.row.value('AcctHldr[1]/LglPrsn[1]/Ownrsh[1]', 'nvarchar(max)'),
           item.row.value('ShrhldgBal[1]/ShrhldgTp[1]', 'nvarchar(max)'),
           item.row.value('ShrhldgBal[1]/Unit[1]', 'nvarchar(max)')
    FROM t1.AcctSubLvl.nodes('AcctSubLvl/Dsclsr') item(row)
) AS nodes(Dsclsr_SfkpgAcct
    , Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Nm
    , Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine1
    , Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine2
    , Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_AdrLine3
    , Dsclsr_AcctHldr_LglPrsn_NmAndAdr_Adr_Ctry
    , Dsclsr_AcctHldr_LglPrsn_LEI
    , Dsclsr_AcctHldr_LglPrsn_Ownrsh
    , Dsclsr_ShrhldgBal_ShrhldgTp
    , Dsclsr_ShrhldgBal_Unit);

DBFiddle demo

  • Related