Home > Net >  How to avoid clubbing of two rows for the same record while doing SUM and GROUP BY in PostgreSql
How to avoid clubbing of two rows for the same record while doing SUM and GROUP BY in PostgreSql

Time:10-20

There are 4 tables in general "product","projecteddata","payoutblock" ,"users"

product

CREATE TABLE IF NOT EXISTS public.product
(
    id integer NOT NULL DEFAULT nextval('product_id_seq'::regclass),
    "productType" character varying COLLATE pg_catalog."default" NOT NULL,
    amount numeric NOT NULL DEFAULT '0'::numeric,
    "startDate" date NOT NULL
)

projecteddata

CREATE TABLE IF NOT EXISTS public.projecteddata
(
    id integer NOT NULL DEFAULT nextval('projecteddata'::regclass),
    "productType" character varying COLLATE pg_catalog."default" NOT NULL,
    amount numeric NOT NULL DEFAULT '0'::numeric,
    "startDate" date NOT NULL,
    "systemGeneratedStartDate" date NOT NULL,
    "systemGeneratedEndDate" date NOT NULL,
    "productId" integer,
    CONSTRAINT "FK_c6c14f315490aceb2f7a9ffb3ed" FOREIGN KEY ("productId")
        REFERENCES public.product (id) MATCH SIMPLE
        ON UPDATE NO ACTION
        ON DELETE NO ACTION
)

payoutblock

CREATE TABLE IF NOT EXISTS public.payoutblock
(
    id integer NOT NULL DEFAULT nextval('payoutblock_id_seq'::regclass),
    "productId" integer,
    "payeeId" uuid, 
    CONSTRAINT "FK_1bc960a61d22c0daf3efd691d70" FOREIGN KEY ("productId")
        REFERENCES public.product (id) MATCH SIMPLE
        ON UPDATE NO ACTION
        ON DELETE NO ACTION,
    CONSTRAINT "FK_e6f4e4b0f0750910b39ec67f816" FOREIGN KEY ("payeeId")
        REFERENCES public.users (id) MATCH SIMPLE
        ON UPDATE NO ACTION
        ON DELETE NO ACTION
)

users

CREATE TABLE IF NOT EXISTS public.users
(
    id uuid NOT NULL DEFAULT uuid_generate_v4(),
    CONSTRAINT "PK_a3ffb1c0c8416b9fc6f907b7433" PRIMARY KEY (id),
)

Sample Input

INSERT INTO public.product(
    id, "productType", amount, "startDate")
    VALUES (1, 'ABCD', 100, '2022-10-02'),
    VALUES (2, 'XYZ', 100, '2022-12-14');
INSERT INTO public.payoutblock(
    id,"productId", "payeeId")
    VALUES (1, 1, '0005c2b6-29f2-491f-8565-c30567f232f9'),
        VALUES (2, 1 , 'ec5e7cbb-2f82-4b05-8135-73746b59afb0'),
    VALUES (3, 2, '0005c2b6-29f2-491f-8565-c30567f232f9');
INSERT INTO public.users(
    id
    VALUES ('0005c2b6-29f2-491f-8565-c30567f232f9'),
    ('ec5e7cbb-2f82-4b05-8135-73746b59afb0');
INSERT INTO public.projecteddata(
    id, "productType", amount, "startDate", "systemGeneratedStartDate", "systemGeneratedEndDate",
    "productId")
    VALUES (1, 'ABCD', 100, '2022-10-02', '2022-10-01', '2022-10-31', 1),
    VALUES (2, 'ABCD', 100, '2022-10-02', '2022-11-01', '2022-11-30', 1),
    VALUES (3, 'XYZ', 100, '2022-12-14', '2022-12-01', '2022-12-31', 2),
    VALUES (4, 'ABCD', 100, '2022-10-02', '2022-12-01', '2022-12-31', 1),
    VALUES (5, 'XYZ', 100, '2022-12-14', '2023-01-01','2023-01-31', 2),
    VALUES (6, 'ABCD', 100, '2022-10-02', '2023-01-01', '2023-01-31', 1),
    VALUES (7, 'XYZ', 100, '2022-12-14', '2023-02-01', '2023-02-28', 2),
    VALUES (8, 'ABCD', 100, '2022-10-02', '2023-02-01', '2023-02-28', 1);

I am trying to run this query:

SELECT sum(rpd."amount"),to_char(rpd."systemGeneratedStartDate", 'YYYY-MM-DD') as "systemGeneratedStartDate",
            to_char(rpd."systemGeneratedEndDate", 'YYYY-MM-DD') as "systemGeneratedEndDate"
            FROM "projecteddata" rpd
            left join public.product rp
            on rpd."productId" = rp."id"
            left join public.payoutblock pb
            on pb."productId" = rp."id"
            WHERE "systemGeneratedStartDate" >= '2022-10-01' AND "systemGeneratedStartDate" < '2024-04-01' 
             AND (pb."payeeId" IN ( 'ec5e7cbb-2f82-4b05-8135-73746b59afb0' , '0005c2b6-29f2-491f-8565-c30567f232f9' )) 
            GROUP BY "systemGeneratedStartDate","systemGeneratedEndDate",rp."id"
                    ORDER BY "systemGeneratedStartDate" ASC

but not getting the desired result

Current Output

sum systemGeneratedStartDate systemGeneratedEndDate
200 "2022-12-01" "2022-12-31"
200 "2023-02-01" "2023-02-28"
200 "2023-03-01" "2023-03-31"
200 "2023-05-01" "2023-05-31"
200 "2023-06-01" "2023-06-30"
200 "2023-07-01" "2023-07-31"
200 "2023-08-01" "2023-08-31"
200 "2023-09-01" "2023-09-30"

Also tried by adding payeeId in the group by clause

SELECT sum(rpd."amount"),to_char(rpd."systemGeneratedStartDate", 'YYYY-MM-DD') as "systemGeneratedStartDate",
            to_char(rpd."systemGeneratedEndDate", 'YYYY-MM-DD') as "systemGeneratedEndDate",pb."payeeId"
            FROM "projecteddata" rpd
            left join public.product rp
            on rpd."productId" = rp."id"
            left join public.payoutblock pb
            on pb."productId" = rp."id"
            WHERE "systemGeneratedStartDate" >= '2022-10-01' AND "systemGeneratedStartDate" < '2024-04-01' 
            AND rpd."status" = 0 AND (pb."payeeId" IN ( 'ec5e7cbb-2f82-4b05-8135-73746b59afb0' , '0005c2b6-29f2-491f-8565-c30567f232f9' )) 
            GROUP BY "systemGeneratedStartDate","systemGeneratedEndDate",rp."id",pb."payeeId"
                    ORDER BY "systemGeneratedStartDate" ASC

Output

sum systemGeneratedStartDate systemGeneratedEndDate payeeId
100 "2022-12-01" "2022-12-31" "0005c2b6-29f2-491f-8565-c30567f232f9"
100 "2022-12-01" "2022-12-31" "ec5e7cbb-2f82-4b05-8135-73746b59afb0"
100 "2023-02-01" "2023-02-28" "0005c2b6-29f2-491f-8565-c30567f232f9"
100 "2023-02-01" "2023-02-28" "ec5e7cbb-2f82-4b05-8135-73746b59afb0"
100 "2023-03-01" "2023-03-31" "0005c2b6-29f2-491f-8565-c30567f232f9"
100 "2023-03-01" "2023-03-31" "ec5e7cbb-2f82-4b05-8135-73746b59afb0"
100 "2023-05-01" "2023-05-31" "0005c2b6-29f2-491f-8565-c30567f232f9"
100 "2023-05-01" "2023-05-31" "ec5e7cbb-2f82-4b05-8135-73746b59afb0"
100 "2023-06-01" "2023-06-30" "0005c2b6-29f2-491f-8565-c30567f232f9"
100 "2023-06-01" "2023-06-30" "ec5e7cbb-2f82-4b05-8135-73746b59afb0"
100 "2023-07-01" "2023-07-31" "0005c2b6-29f2-491f-8565-c30567f232f9"
100 "2023-07-01" "2023-07-31" "ec5e7cbb-2f82-4b05-8135-73746b59afb0"
100 "2023-08-01" "2023-08-31" "0005c2b6-29f2-491f-8565-c30567f232f9"
100 "2023-08-01" "2023-08-31" "ec5e7cbb-2f82-4b05-8135-73746b59afb0"
100 "2023-09-01" "2023-09-30" "0005c2b6-29f2-491f-8565-c30567f232f9"
100 "2023-09-01" "2023-09-30" "ec5e7cbb-2f82-4b05-8135-73746b59afb0"

This is happening because payoutblock contains two payeeId for which the sum is getting clubbed.

What is required is the sum to not get clubbed even if two payeedId's are there for one payoutblock

EXPECTED OUTPUT

sum systemGeneratedStartDate systemGeneratedEndDate
100 "2022-12-01" "2022-12-31"
100 "2023-02-01" "2023-02-28"
100 "2023-03-01" "2023-03-31"
100 "2023-05-01" "2023-05-31"
100 "2023-06-01" "2023-06-30"
100 "2023-07-01" "2023-07-31"
100 "2023-08-01" "2023-08-31"
100 "2023-09-01" "2023-09-30"

[Link to the entity diagram][1] [1]: https://i.stack.imgur.com/nll7Q.png

CodePudding user response:

Instead of this:

left join public.payoutblock pb on pb."productId" = rp."id"

try something like:

left join (select distinct productID
           from public.payoutblock
           where "payeeId" IN ( 'ec5e7cbb-2f82-4b05-8135-73746b59afb0' ,
                                '0005c2b6-29f2-491f-8565-c30567f232f9' )
          ) pb on pb."productId" = rp."id"

and then in your main WHERE clause you would have:

WHERE pb."productId" is not null

or, potentially, just make the join an INNER rather than LEFT

CodePudding user response:

You should try summing up distinctively, so that repeated values for given partition won't be summed up, and your issue gets automatically solved.

SELECT SUM(DISTINCT rpd.amount),
       TO_CHAR(rpd.systemGeneratedStartDate, 'YYYY-MM-DD') AS sgStartDate,
       TO_CHAR(rpd.systemGeneratedEndDate  , 'YYYY-MM-DD') AS sgEndDate
FROM projecteddata    rpd
LEFT JOIN product     rp  ON rpd.productId = rp.id
LEFT JOIN payoutblock pb  ON pb.productId  = rp.id
WHERE systemGeneratedStartDate BETWEEN '2022-10-01' AND '2024-04-01' 
  AND pb.payeeId IN ( 'ec5e7cbb-2f82-4b05-8135-73746b59afb0' , '0005c2b6-29f2-491f-8565-c30567f232f9' ) 
GROUP BY systemGeneratedStartDate,
         systemGeneratedEndDate,
         rp.id
ORDER BY systemGeneratedStartDate ASC

Check the demo here.

  • Related