Here is my table, I want to merge the two rows of NUM_FILES and Total_size as "TABLE_NAME","TBL_ID","PART_ID","TABLE_TYPE","TABLE_LOCATION","TABLE_OWNER","DATABASE_NAME","NUM_FILES","TOTAL_SIZE" products_partitioned,2,2,EXTERNAL_TABLE,hdfs://sandbox-hdp.hortonworks.com:8020/HIVE_ROVER_IT/bikestores/products,hive,rovertesting,"3",4563
my full query is:
SELECT DISTINCT tbl.tbl_name TABLE_NAME, tbl.TBL_ID TBL_ID, pp.PART_ID,
tbl.tbl_type TABLE_TYPE,
sds.location TABLE_LOCATION,
tbl.OWNER TABLE_OWNER,
--tbl.LAST_ACCESS_TIME ASSET_DATE_LAST_MODIFIED,
dbs.name DATABASE_NAME,
CASE pp.PARAM_KEY
WHEN 'numFiles' THEN pp.PARAM_VALUE
END AS NUM_FILES,
CASE pp.PARAM_KEY
WHEN 'totalSize' THEN pp.PARAM_VALUE
END AS TOTAL_SIZE
FROM TBLS tbl
INNER JOIN SDS ON tbl.tbl_id = sds.cd_id
INNER JOIN DBS ON dbs.db_id = tbl.db_id
LEFT JOIN PARTITIONS ON tbl.TBL_ID = PARTITIONS.TBL_ID
INNER JOIN PARTITION_PARAMS pp ON pp.PART_ID = PARTITIONS.PART_ID
WHERE pp.PARAM_KEY IN ('totalSize', 'numFiles') AND tbl.tbl_type IN ('MANAGED_TABLE','EXTERNAL_TABLE')
GROUP BY (tbl.tbl_name, tbl.TBL_ID, pp.PART_ID, tbl.tbl_type, sds.location, tbl.OWNER, dbs.name, pp.PARAM_KEY, pp.PARAM_VALUE)
ORDER BY TBL_ID, PART_ID ;
CodePudding user response:
From my point of view, you should sum those values up and then group by
the rest of non-aggregated columns:
select tbl.tbl_name table_name,
tbl.tbl_id tbl_id,
pp.part_id,
tbl.tbl_type table_type,
sds.location table_location,
tbl.owner table_owner,
dbs.name database_name,
--
sum (case pp.param_key when 'numFiles' then pp.param_value end) num_files,
--
sum (case pp.param_key when 'totalSize' then pp.param_value end) total_size
from tbls tbl
inner join sds on tbl.tbl_id = sds.cd_id
inner join dbs on dbs.db_id = tbl.db_id
left join partitions on tbl.tbl_id = partitions.tbl_id
inner join partition_params pp on pp.part_id = partitions.part_id
where pp.param_key in ('totalSize', 'numFiles')
and tbl.tbl_type in ('MANAGED_TABLE', 'EXTERNAL_TABLE')
group by tbl.tbl_name,
tbl.tbl_id,
pp.part_id,
tbl.tbl_type,
sds.location,
tbl.owner,
dbs.name