Using SQL Server 2019 Express Edition.
I have a text file like this:
/type/author /authors/OL1002354A 2 2008-08-20T18:07:53.62084 {"name": "Don L. Brigham", "personal_name": "Don L. Brigham", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:07:53.62084"}, "key": "/authors/OL1002354A", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL100246A 1 2008-04-01T03:28:50.625462 {"name": "Talib Samat.", "personal_name": "Talib Samat.", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL100246A", "type": {"key": "/type/author"}, "revision": 1}
/type/author /authors/OL1002700A 1 2008-04-01T03:28:50.625462 {"name": "Bengt E. Gustafsson Symposium (5th 1988 Stockholm, Sweden)", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1002700A", "type": {"key": "/type/author"}, "revision": 1}
/type/author /authors/OL1002807A 2 2008-08-20T18:12:02.683498 {"name": "Ary J. Lamme", "personal_name": "Ary J. Lamme", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:12:02.683498"}, "key": "/authors/OL1002807A", "birth_date": "1940", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL1002994A 5 2012-03-03T06:50:39.836886 {"name": "R. Baxter Miller", "personal_name": "R. Baxter Miller", "created": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "photos": [7075806, 6974916], "last_modified": {"type": "/type/datetime", "value": "2012-03-03T06:50:39.836886"}, "latest_revision": 5, "key": "/authors/OL1002994A", "type": {"key": "/type/author"}, "revision": 5}
/type/author /authors/OL100301A 1 2008-04-01T03:28:50.625462 {"name": "Ghazali Basri.", "personal_name": "Ghazali Basri.", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL100301A", "type": {"key": "/type/author"}, "revision": 1}
/type/author /authors/OL1003201A 2 2008-08-20T18:14:55.775993 {"name": "Robert Smaus", "personal_name": "Robert Smaus", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:14:55.775993"}, "key": "/authors/OL1003201A", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL1003202A 2 2008-08-20T18:14:56.005766 {"name": "Richard Mark Friedhoff", "personal_name": "Richard Mark Friedhoff", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:14:56.005766"}, "key": "/authors/OL1003202A", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL1003235A 1 2008-04-01T03:28:50.625462 {"name": "Hunbatz Men", "personal_name": "Hunbatz Men", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1003235A", "birth_date": "1941", "type": {"key": "/type/author"}, "revision": 1}
/type/author /authors/OL1003719A 1 2008-04-01T03:28:50.625462 {"name": "NATO Advanced Research Workshop on Ras Oncogenes (1988 Athens, Greece)", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1003719A", "type": {"key": "/type/author"}, "revision": 1}
/type/author /authors/OL1003744A 2 2008-08-20T18:20:16.351762 {"name": "Jeanne Thieme", "personal_name": "Jeanne Thieme", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:20:16.351762"}, "key": "/authors/OL1003744A", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL1003901A 2 2008-08-20T18:21:31.331678 {"name": "Kiiti Morita", "personal_name": "Kiiti Morita", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:21:31.331678"}, "key": "/authors/OL1003901A", "birth_date": "1915", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL1004047A 1 2008-04-01T03:28:50.625462 {"name": "Murphy, William M.", "personal_name": "Murphy, William M.", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1004047A", "birth_date": "1942", "type": {"key": "/type/author"}, "revision": 1}
The columns are delimited by tabulation and rows by line feed.
I need to get the data inside the 4th column that is a JSON structure. For example I need the value of all "name" attributes.
I've imported the data using SSIS into a table and then I can CROSS APPLY OPENJSON(json_column)
just fine to get the keys and values. But I was wondering if that couldn't be done with SQL/TSQL alone, using OPENROWSET
directly and working with just the column that is formatted in JSON. Tried using OPENROWSET
with CROSS APPLY OPENJSON(BulkColumn)
but cannot be done since the rest of the columns aren't JSON formatted.
Any idea on how to avoid this error or a different approach?
CodePudding user response:
You can use BULK INSERT
to get the file into a temp-table and get it parsed as Tab-delimited file. Then using OPENJSON
to get the JSON-data. The following worked for me:
DROP TABLE IF EXISTS #Temp;
CREATE TABLE #Temp (
/* Just some random column names*/
Author NVARCHAR(100),
AuthorPath NVARCHAR(100),
IntValue INT,
Created DATETIME2(3),
JsonData NVARCHAR(MAX)
);
BULK INSERT #Temp
FROM 'C:\Users\andre\Documents\temp\test.txt'
WITH (
FIELDTERMINATOR = '\t', --Tab delimited
ROWTERMINATOR = '\n' --New-line character for row termination
)
SELECT
Temp.*,
JsonData.[name]
FROM #Temp Temp
CROSS APPLY OPENJSON(Temp.JsonData,'$')
WITH(
[name] NVARCHAR(200) '$.name'
) JsonData