I have a XML like below :
<Employees>
<Employee>
<AccountInfo>
<AccountNumber>1234567</AccountNumber>
<AccountType>Test</AccountType>
</AccountInfo>
<DocumentType>Test Doc</DocumentType>
<Date>12/01/2020</Date>
<Description>Test Description</Description>
<ImageFileType>pdf</ImageFileType>
<ImageFileName>321.PDF</ImageFileName>
<AdditionalInfo>
<FieldName>docDescription</FieldName>
<FieldValue>ABC XYZ</FieldValue>
</AdditionalInfo>
<AdditionalInfo>
<FieldName>Creation Date</FieldName>
<FieldValue>12/01/2020</FieldValue>
</AdditionalInfo>
<AdditionalInfo>
<FieldName>Department Code</FieldName>
<FieldValue>63</FieldValue>
</AdditionalInfo>
<AdditionalInfo>
<FieldName>ID No</FieldName>
<FieldValue>3214567</FieldValue>
</AdditionalInfo>
</Employee>
</Employees>
I want to insert this XML data into 3 tables EmployeeInfo
, AccountInfo
and AdditionalInfo
with schema like this:
EmployeeInfo
(
EmployeeNumber Int Identity(1,1) NOT NULL,
DocumentType varchar(500) NULL,
[Description] varchar(500) NULL,
ImageFileName varchar(500) NULL,
ImageFileType varchar(500) NULL,
[Date] varchar(500) NULL
);
AccountInfo
(
EmployeeNumber int NOT NULL,
AccountNumber varchar(500) NULL,
AccountType varchar(500) NULL
);
AdditionalInfo
(
EmployeeNumber int NOT NULL,
FieldName varchar(500) NULL,
FieldValue varchar(500) NULL
);
EmployeeNumber
column is used for linking AccountInfo
and AdditionalInfo
table with EmployeeInfo
.
AccountInfo
table will get below node:
<AccountInfo>
<AccountNumber>1234567</AccountNumber>
<AccountType>Test</AccountType>
</AccountInfo>
The AdditionalInfo
table will get these XML nodes:
<AdditionalInfo>
<FieldName>docDescription</FieldName>
<FieldValue>ABC XYZ</FieldValue>
</AdditionalInfo>
<AdditionalInfo>
<FieldName>Creation Date</FieldName>
<FieldValue>12/01/2020</FieldValue>
</AdditionalInfo>
<AdditionalInfo>
<FieldName>Department Code</FieldName>
<FieldValue>63</FieldValue>
</AdditionalInfo>
<AdditionalInfo>
<FieldName>ID No</FieldName>
<FieldValue>3214567</FieldValue>
</AdditionalInfo>
Rest xml node are inserted into EmployeeInfo
.
I tried with the query shown here. I'm able to get xml data and insert it into main table EmployeeInfo
, but not able to link AdditionalInfo
and AccountInfo
with identity generated in EmployeeInfo
table.
Note: I have multiple employee nodes in the xml.
DECLARE @EmpNumber int
DECLARE @x xml
SELECT @x = X FROM OPENROWSET (BULK 'C:\Test\Sample.xml', SINGLE_BLOB) AS EmpInfo(X)
DECLARE @hdoc int
EXEC sp_xml_preparedocument @hdoc OUTPUT, @x
INSERT INTO EmployeeInfo (DocumentType, [Description], ImageFileName, ImageFileType, [Date])
SELECT * FROM OPENXML (@hdoc, '/Employees/Employee', 2)
WITH ( DocumentType varchar(500), [Description] varchar(500), ImageFileName varchar(500), ImageFileType varchar(500), [Date] varchar(500))
SELECT @EmpNumber=SCOPE_IDENTITY()
INSERT INTO AccountInfo ([EmployeeNumber],[AccountNumber], [AccountType])
SELECT @EmpNumber, *
FROM OPENXML (@hdoc, '/Employees/Employee/AccountInfo', 2)
WITH (AccountNumber varchar(500), AccountType varchar(500))
INSERT INTO AdditionalInfo ([EmployeeNumber],[FieldName], [FieldValue])
SELECT @EmpNumber, *
FROM OPENXML (@hdoc, '/Employees/Employee/AdditionalInfo', 2)
WITH (
FieldName varchar(5000), FieldValue varchar(5000)
)
EXEC sp_xml_removedocument @hdoc
Can someone help me out in this. Thanks in Advance.
CodePudding user response:
Here is a conceptual example how to do it.
Two tables, state as a parent, and city as a child, with one-to-many relationship. Primary keys are IDENTITY
based.
INSERT
into a parent table generates new IDENTITY
values that are captured and stored in a table variable, and later used to INSERT into a child table to preserve foreign key constraint.
SQL
-- DDL and sample data population, start
USE tempdb;
GO
DROP TABLE IF EXISTS #city;
DROP TABLE IF EXISTS #state;
-- parent table
CREATE TABLE #state (
stateID INT IDENTITY PRIMARY KEY,
stateName VARCHAR(30),
abbr CHAR(2),
capital VARCHAR(30)
);
-- child table (1-to-many)
CREATE TABLE #city (
cityID INT IDENTITY,
stateID INT NOT NULL FOREIGN KEY REFERENCES #state(stateID),
city VARCHAR(30),
[population] INT,
PRIMARY KEY (cityID, stateID, city)
);
-- mapping table to preserve IDENTITY ids
DECLARE @idmapping TABLE (GeneratedID INT PRIMARY KEY,
NaturalID VARCHAR(20) NOT NULL UNIQUE);
DECLARE @xml XML =
N'<root>
<state>
<StateName>Florida</StateName>
<Abbr>FL</Abbr>
<Capital>Tallahassee</Capital>
<cities>
<city>
<city>Miami</city>
<population>470194</population>
</city>
<city>
<city>Orlando</city>
<population>285713</population>
</city>
</cities>
</state>
<state>
<StateName>Texas</StateName>
<Abbr>TX</Abbr>
<Capital>Austin</Capital>
<cities>
<city>
<city>Houston</city>
<population>2100263</population>
</city>
<city>
<city>Dallas</city>
<population>5560892</population>
</city>
</cities>
</state>
</root>';
-- DDL and sample data population, end
;WITH rs AS
(
SELECT stateName = p.value('(StateName/text())[1]', 'VARCHAR(30)'),
abbr = p.value('(Abbr/text())[1]', 'CHAR(2)'),
capital = p.value('(Capital/text())[1]', 'VARCHAR(30)')
FROM @xml.nodes('/root/state') AS t(p)
)
MERGE #state AS o
USING rs ON 1 = 0
WHEN NOT MATCHED THEN
INSERT(stateName, abbr, capital)
VALUES(rs.stateName, rs.Abbr, rs.Capital)
OUTPUT inserted.stateID, rs.stateName
INTO @idmapping (GeneratedID, NaturalID);
;WITH Details AS
(
SELECT NaturalID = p.value('(StateName/text())[1]', 'VARCHAR(30)'),
city = c.value('(city/text())[1]', 'VARCHAR(30)'),
[population] = c.value('(population/text())[1]', 'INT')
FROM @xml.nodes('/root/state') AS A(p) -- parent
CROSS APPLY A.p.nodes('cities/city') AS B(c) -- child
)
INSERT #city (stateID, city, [Population])
SELECT m.GeneratedID, d.city, d.[Population]
FROM Details AS d
INNER JOIN @idmapping AS m ON d.NaturalID = m.NaturalID;
-- test
SELECT * FROM #state;
SELECT * FROM @idmapping;
SELECT * FROM #city;
CodePudding user response:
Avoid using sp_xml_preparedocument, OPENXML and sp_xml_removedocument because they are inefficient, often cause resource leakage when sp_xml_removedocument is forgotten, and encourage RBAR-like constructs in what's supposed to be a set-based RDBMS.
Prefer to use nodes() and value() where possible, such as with the following...
insert dbo.EmployeeInfo (DocumentType, [Description], ImageFileName, ImageFileType, [Date])
select
Employee.value('(DocumentType/text())[1]', 'varchar(500)'),
Employee.value('(Description/text())[1]', 'varchar(500)'),
Employee.value('(ImageFileName/text())[1]', 'varchar(500)'),
Employee.value('(ImageFileType/text())[1]', 'varchar(500)'),
Employee.value('(Date/text())[1]', 'varchar(500)')
from @xml.nodes('/Employees/Employee') root(Employee);
declare @EmpNumber int = SCOPE_IDENTITY();
insert dbo.AccountInfo ([EmployeeNumber], [AccountNumber], [AccountType])
select
@EmpNumber,
AccountInfo.value('(AccountNumber/text())[1]', 'varchar(500)'),
AccountInfo.value('(AccountType/text())[1]', 'varchar(500)')
from @xml.nodes('/Employees/Employee/AccountInfo') root(AccountInfo);
insert dbo.AdditionalInfo ([EmployeeNumber], [FieldName], [FieldValue])
select
@EmpNumber,
AdditionalInfo.value('(FieldName/text())[1]', 'varchar(500)'),
AdditionalInfo.value('(FieldValue/text())[1]', 'varchar(500)')
from @xml.nodes('/Employees/Employee/AdditionalInfo') root(AdditionalInfo);