Home > OS >  PostgreSQL - Group By Two Columns And Use One As Column For Result
PostgreSQL - Group By Two Columns And Use One As Column For Result

Time:11-26

I have two tables: Subject and Journal as following:

Subject
 id | name
----------
  1 | fruit
  2 | drink
  3 | vege
  4 | fish

and

Journal
 id | subj | reference | value
------------------------------
  1 |    1 |       foo |    30
  2 |    2 |       bar |    20
  3 |    1 |       bar |    35
  4 |    1 |       bar |    10
  5 |    2 |       baz |    25
  6 |    4 |       foo |    30
  7 |    4 |       bar |    40
  8 |    1 |       baz |    20
  9 |    2 |       bar |     5

I want to sum Journal.value group by both subj and reference.

I know the group by clause is for this purpose, but I would expect an output as following:

reference | subj_1 | subj_2 | subj_3 | subj_4
          |  fruit |  drink |   vege |   fish (even better)
---------------------------------------------
      foo |     30 |        |        |     30
      bar |     45 |     25 |        |     40
      baz |     20 |     25 |        |

Is this possible?

CodePudding user response:

This produces your desired result:

SELECT *
FROM   crosstab(
   'SELECT reference, subj, sum(value)
    FROM   journal
    GROUP  BY 1, 2
    ORDER  BY 1, 2'

  , $$VALUES (1), (2), (3), (4)$$
   ) AS ct (reference text, fruit int, drink int, vege int, fish int);

db<>fiddle here

Except for the sort order, which seems arbitrary?

Detailed explanation and instructions:

CodePudding user response:

You can generate a Sql Statement based on the current data.

Then use that generated Sql statement

Sample data:

create table Subject (
 id serial primary key, 
 name varchar(30) not null
 );

insert into Subject (id, name) values
 (1 ,'fruit')
,(2 ,'drink')
,(3 ,'vege')
,(4 ,'fish');
 
create table Journal (
 id int, 
 subj int, 
 reference varchar(30), 
 value int
);

insert into Journal   
(id, subj, reference, value) values
 (1, 1, 'foo', 30)
,(2, 2, 'bar', 20)
,(3, 1, 'bar', 35)
,(4, 1, 'bar' ,10)
,(5, 2, 'baz', 25)
,(6, 4, 'foo', 30)
,(7, 4, 'bar', 40)
,(8, 1, 'baz', 20)
,(9, 2, 'bar', 5);

Generate statement:

SELECT $f$SELECT * FROM crosstab(
     $$SELECT DISTINCT ON (1, 2)
       j.reference, 'subj_'||j.subj||'_'||s.name AS data_type, SUM(j.value) AS val
       FROM Journal j
       JOIN Subject s ON s.id = j.subj
       GROUP BY j.reference, j.subj, s.name
       ORDER BY j.reference$$

    ,$$VALUES ($f$     || string_agg(quote_literal(data_type), '), (') || $f$)$$)
AS x (reference text, $f$ || string_agg(quote_ident(data_type), ' int, ') || ' int)'
AS Stmt
FROM  (SELECT concat('subj_', id, '_', name) AS data_type FROM Subject) x
| stmt                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
| :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| SELECT * FROM crosstab(<br>     $$SELECT DISTINCT ON (1, 2)<br>       j.reference, 'subj_'||j.subj||'_'||s.name AS data_type, SUM(j.value) AS val<br>       FROM Journal j<br>       JOIN Subject s ON s.id = j.subj<br>       GROUP BY j.reference, j.subj, s.name<br>       ORDER BY j.reference$$<br><br>    ,$$VALUES ('subj_1_fruit'), ('subj_2_drink'), ('subj_3_vege'), ('subj_4_fish')$$)<br>AS x (reference text, subj_1_fruit int, subj_2_drink int, subj_3_vege int, subj_4_fish int) |

Run it

SELECT * FROM crosstab(
     $$SELECT DISTINCT ON (1, 2)
       j.reference, 'subj_'||j.subj||'_'||s.name AS data_type, SUM(j.value) AS val
       FROM Journal j
       JOIN Subject s ON s.id = j.subj
       GROUP BY j.reference, j.subj, s.name
       ORDER BY j.reference$$

    ,$$VALUES ('subj_1_fruit'), ('subj_2_drink'), ('subj_3_vege'), ('subj_4_fish')$$)
AS x (reference text, subj_1_fruit int, subj_2_drink int, subj_3_vege int, subj_4_fish int)
reference | subj_1_fruit | subj_2_drink | subj_3_vege | subj_4_fish
:-------- | -----------: | -----------: | ----------: | ----------:
bar       |           45 |           25 |        null |          40
baz       |           20 |           25 |        null |        null
foo       |           30 |         null |        null |          30

db<>fiddle here

  • Related