Home > database >  Query data from a text file and get a JSON column
Query data from a text file and get a JSON column

Time:10-25

Using SQL Server 2019 Express Edition.

I have a text file like this:

/type/author    /authors/OL1002354A 2   2008-08-20T18:07:53.62084   {"name": "Don L. Brigham", "personal_name": "Don L. Brigham", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:07:53.62084"}, "key": "/authors/OL1002354A", "type": {"key": "/type/author"}, "revision": 2}
/type/author    /authors/OL100246A  1   2008-04-01T03:28:50.625462  {"name": "Talib Samat.", "personal_name": "Talib Samat.", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL100246A", "type": {"key": "/type/author"}, "revision": 1}
/type/author    /authors/OL1002700A 1   2008-04-01T03:28:50.625462  {"name": "Bengt E. Gustafsson Symposium (5th 1988 Stockholm, Sweden)", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1002700A", "type": {"key": "/type/author"}, "revision": 1}
/type/author    /authors/OL1002807A 2   2008-08-20T18:12:02.683498  {"name": "Ary J. Lamme", "personal_name": "Ary J. Lamme", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:12:02.683498"}, "key": "/authors/OL1002807A", "birth_date": "1940", "type": {"key": "/type/author"}, "revision": 2}
/type/author    /authors/OL1002994A 5   2012-03-03T06:50:39.836886  {"name": "R. Baxter Miller", "personal_name": "R. Baxter Miller", "created": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "photos": [7075806, 6974916], "last_modified": {"type": "/type/datetime", "value": "2012-03-03T06:50:39.836886"}, "latest_revision": 5, "key": "/authors/OL1002994A", "type": {"key": "/type/author"}, "revision": 5}
/type/author    /authors/OL100301A  1   2008-04-01T03:28:50.625462  {"name": "Ghazali Basri.", "personal_name": "Ghazali Basri.", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL100301A", "type": {"key": "/type/author"}, "revision": 1}
/type/author    /authors/OL1003201A 2   2008-08-20T18:14:55.775993  {"name": "Robert Smaus", "personal_name": "Robert Smaus", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:14:55.775993"}, "key": "/authors/OL1003201A", "type": {"key": "/type/author"}, "revision": 2}
/type/author    /authors/OL1003202A 2   2008-08-20T18:14:56.005766  {"name": "Richard Mark Friedhoff", "personal_name": "Richard Mark Friedhoff", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:14:56.005766"}, "key": "/authors/OL1003202A", "type": {"key": "/type/author"}, "revision": 2}
/type/author    /authors/OL1003235A 1   2008-04-01T03:28:50.625462  {"name": "Hunbatz Men", "personal_name": "Hunbatz Men", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1003235A", "birth_date": "1941", "type": {"key": "/type/author"}, "revision": 1}
/type/author    /authors/OL1003719A 1   2008-04-01T03:28:50.625462  {"name": "NATO Advanced Research Workshop on Ras Oncogenes (1988 Athens, Greece)", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1003719A", "type": {"key": "/type/author"}, "revision": 1}
/type/author    /authors/OL1003744A 2   2008-08-20T18:20:16.351762  {"name": "Jeanne Thieme", "personal_name": "Jeanne Thieme", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:20:16.351762"}, "key": "/authors/OL1003744A", "type": {"key": "/type/author"}, "revision": 2}
/type/author    /authors/OL1003901A 2   2008-08-20T18:21:31.331678  {"name": "Kiiti Morita", "personal_name": "Kiiti Morita", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:21:31.331678"}, "key": "/authors/OL1003901A", "birth_date": "1915", "type": {"key": "/type/author"}, "revision": 2}
/type/author    /authors/OL1004047A 1   2008-04-01T03:28:50.625462  {"name": "Murphy, William M.", "personal_name": "Murphy, William M.", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1004047A", "birth_date": "1942", "type": {"key": "/type/author"}, "revision": 1}

The columns are delimited by tabulation and rows by line feed.

I need to get the data inside the 4th column that is a JSON structure. For example I need the value of all "name" attributes.

I've imported the data using SSIS into a table and then I can CROSS APPLY OPENJSON(json_column) just fine to get the keys and values. But I was wondering if that couldn't be done with SQL/TSQL alone, using OPENROWSET directly and working with just the column that is formatted in JSON. Tried using OPENROWSET with CROSS APPLY OPENJSON(BulkColumn) but cannot be done since the rest of the columns aren't JSON formatted.

Any idea on how to avoid this error or a different approach?

CodePudding user response:

You can use BULK INSERT to get the file into a temp-table and get it parsed as Tab-delimited file. Then using OPENJSON to get the JSON-data. The following worked for me:

DROP TABLE IF EXISTS #Temp;
CREATE TABLE #Temp (
    /* Just some random column names*/
    Author NVARCHAR(100),
    AuthorPath NVARCHAR(100),
    IntValue INT,
    Created DATETIME2(3),
    JsonData NVARCHAR(MAX)
);

BULK INSERT #Temp
FROM 'C:\Users\andre\Documents\temp\test.txt'
WITH (
    FIELDTERMINATOR = '\t', --Tab delimited
    ROWTERMINATOR = '\n' --New-line character for row termination
)

SELECT 
    Temp.*,
    JsonData.[name]
FROM #Temp Temp
CROSS APPLY OPENJSON(Temp.JsonData,'$') 
WITH(
    [name] NVARCHAR(200) '$.name'
) JsonData
  • Related