Home > Software design >  Filtering and calculating over JSONB object
Filtering and calculating over JSONB object

Time:01-03

I have a table with JSONB column which holds data like this:

create table car_stats (
    id int primary key,
    city varchar,
    date timestamp,
    info varchar
    stats jsonb
)

stats example:

[
  {
    "brand": "AUDI",
    "status": "NEW"
  },
  {
    "brand": "BMW",
    "status": "PRODUCTION"
  },
  {
    "brand": "BMW",
    "status": "NEW"
  },
  {
    "brand": "BMW",
    "status": "NEW"
  },
  {
    "brand": "BMW",
    "status": "DELIVERED"
  }
]

I want to count percentage of new / production / delivered of car's brand grouped by city and month

 CITY   MONTH BRAND NEW  PRODUCTION DELIVERED
 LONDON  3    AUDI  100%   0          0
 PARIS   2    BMW   50%    25%        25%

I tried the following, but I have no idea how to calculate elements in JSON (e.g. all BMW in status NEW)

with cte as (
   select stats ->> 'brand',
          stats ->> 'status',
          city, 
          date
   from car_stats
   group by city
), 
 grouped as (
   select cte.brand,
          cte.country,
          cte.date,
          ARRAY_TO_JSON(ARRAY_AGG(base)) as statistics
   from cte
   group by cte.brand, cte.city, cte.date       
 ),
 stats as (
    count % statistics somehow here.. ? 
 )

)

CodePudding user response:

You can associate each each element in stats with its corresponding city, and then use sum with group by:

with recursive cte(id, c, p, i, d) as (
   select c.id, c.city, (c.stats ->> 0)::jsonb, 1, c.stats from car_stats c
   union all
   select c.id, c.c, (c.d ->> c.i)::jsonb, c.i 1, c.d from cte c where c.i < jsonb_array_length(c.d)
)
select c.c, extract(month from c1.date), c.p -> 'brand', c.p -> 'factory'
   round(sum(case when (c.p -> 'status')::text = '"NEW"' then 1 else 0 end)/count(*)::decimal,2), 
   round(sum(case when (c.p -> 'status')::text = '"PRODUCTION"' then 1 else 0 end)/count(*)::decimal,2), 
   round(sum(case when (c.p -> 'status')::text = '"DELIVERED"' then 1 else 0 end)/count(*)::decimal,2)
from cte c join car_stats c1 on c.id = c1.id 
group by c.c, extract(month from c1.date), c.p -> 'brand', c -> 'factory'

See demo.

CodePudding user response:

First expand brand and status into separate rows using cross join lateral and then use count filter conditional aggregation.

with t as 
(
 select city, date_trunc('month', "date")::date y_month, brand, status
 from car_stats 
 cross join lateral
 (
  select j ->> 'brand' brand,
         j ->> 'status' status
  from jsonb_array_elements(stats) j
 ) t
)
select city, y_month, brand,
  count(*) filter (where status = 'NEW')::numeric/count(*)*100 "NEW",
  count(*) filter (where status = 'PRODUCTION')::numeric/count(*)*100 "PRODUCTION",
  count(*) filter (where status = 'DELIVERED')::numeric/count(*)*100 "DELIVERED"
from t
group by city, y_month, brand;
  • Related