Home > OS >  SQL Server; How to incorporate unique values from STRING_AGG?
SQL Server; How to incorporate unique values from STRING_AGG?

Time:11-18

There are a handful of discussions on how to setup unique values with respect to STRING_AGG, see:

https://stackoverflow.com/a/51646573/6534818

https://stackoverflow.com/a/50589222/6534818

but I am struggling to port them over to my case as I would like to maintain my ordering of results as well as management of NULL.

Consider the following MWE -- how might I just return the distinct/unique values for the STRING_AGG operation?

    CREATE TABLE #bridge (dim2Key int, groupKey int)
    INSERT  #bridge (dim2Key, groupKey) 
    VALUES (1, 1)
    ,(2, 1)
    ,(3, 1)
    ,(4, 2)
    ,(5, NULL)

    CREATE TABLE #dim2 (dim2Key int, attributeOne varchar(255))
    INSERT  #dim2 (dim2Key, attributeOne)
    VALUES (1, 'B')
    ,(2, 'A')
    ,(3, 'A')
    ,(4, NULL)

    CREATE TABLE #dim1 (dim1Key int, attributeTwo int, attributeThree varchar(255))
    INSERT #dim1 (dim1Key, attributeTwo, attributeThree)
    VALUES (1, 35,   'val1') 
    ,(2, 25,   'val2')   
    ,(3, 45,   'val3')   
    ,(4, 55,   'val1')   
   ,(5, NULL,   NULL)   

    CREATE TABLE #fact1 (dim1Key int, factvalue1 int, groupKey int)
    INSERT #fact1 (dim1Key, factvalue1, groupKey)
    VALUES (1,   5,  1)
    ,(2,   25, 1)
    ,(3,   55, 1)
    ,(4,   99, 2)
    ,(5,   99, NULL)   
    GO

SELECT
  #fact1.dim1Key,
  #fact1.factvalue1,
  #fact1.groupKey,
  #dim1.attributeTwo,
  #dim1.attributeThree,
  ISNULL(#dim2.attributeOne, '<missing>')
FROM #fact1
JOIN #dim1 ON #dim1.dim1key = #fact1.dim1key
CROSS APPLY (
    SELECT attributeOne =  STRING_AGG(ISNULL(#dim2.attributeOne, '<missing>'), ', ')  WITHIN GROUP (ORDER BY #dim2.attributeOne)
    FROM #bridge b
    JOIN #dim2 ON #dim2.dim2key = b.dim2key
    WHERE b.groupKey = #fact1.groupKey
) #dim2

CodePudding user response:

Just put it in a subquery with DISTINCT

SELECT
  #fact1.dim1Key,
  #fact1.factvalue1,
  #fact1.groupKey,
  #dim1.attributeTwo,
  #dim1.attributeThree,
  ISNULL(#dim2.attributeOne, '<missing>')
FROM #fact1
JOIN #dim1 ON #dim1.dim1key = #fact1.dim1key
CROSS APPLY (
    SELECT
      attributeOne = STRING_AGG(ISNULL(d2.attributeOne, '<missing>'), ', ')  WITHIN GROUP (ORDER BY d2.attributeOne)
    FROM (
        SELECT DISTINCT
          #dim2.attributeOne
        FROM #bridge b
        JOIN #dim2 ON #dim2.dim2key = b.dim2key
        WHERE b.groupKey = #fact1.groupKey
    ) d2
) #dim2
  • Related