I have data like this:
date user prod shop cat1 cat2
2022-02-01 1 a a ah g
2022-02-02 1 a1 b ah g
2022-04-03 1 a a ah g
2022-04-19 1 a a ah g
2022-05-01 2 b c bg g
I want to know how many user buy the same product in the same shop for >2 times in period 1 year. The result i want like:
table 1
cat1 number_of_user
ah 1
table 2
cat2 number_of_user
g 1
For total user, my query like:
WITH data_product AS(
SELECT DATE(payment_time) date,
user,
CONCAT(prod, "_", shop) product_shop,
cat1,
cat2
FROM
a
WHERE
DATE(payment_time) BETWEEN "2022-01-01" AND DATE_SUB(current_date, INTERVAL 1 day)
ORDER BY 1,2,3),
purchased AS (
SELECT user, product_shop, count(product_shop) tot_purchased
FROM data_product
GROUP BY 1,2
HAVING COUNT(product_shop) > 2
)
SELECT COUNT(user) number_of_user FROM purchased
Please help to get number of user buy the same product in the same shop more than 2 times in period based on cat1
and cat2
.
CodePudding user response:
I think this query might work:
with cte as
(select distinct
PDate,userID as userID,prod as prod,shop,cat1 as cat1,cat2,
count(userID) over (partition by UserID,prod,shop,year(Pdate),cat1) as cat1_count,
count(PDate) over (partition by UserID,prod,shop,year(Pdate),cat2) as cat2_count
from tbl1)
select
cte.cat1 as c1,'0' as c2,count(distinct cte.cat1) as Num
from cte
where cte.cat1_count>1
group by cte.prod,cte.userID,cte.cat1
union
select
'0',cte.cat2,count(distinct cte.cat2)
from cte
where cte.cat2_count>1
group by cte.prod,cte.userID,cte.cat2
CodePudding user response:
Try this:
create temporary table table1 as(
select *,extract(YEAR from date) as year from `projectid.dataset.table`
);
create temporary table table2 as(
select * except(date,cat2) ,count(user) over(partition by cat1,year,user,prod,shop) tcount from table1
);
create temporary table table4 as(
select * except(date,cat1) ,count(user) over(partition by cat2,year,user,prod,shop) tcount from table1
);
select distinct year,cat1 ,count(distinct user) number_of_user from table2 where tcount>2 group by YEAR,cat1;
select distinct year,cat2 ,count(distinct user) number_of_user from table4 where tcount>2 group by YEAR,cat2;
If you want a single result set you can union both the select statements.