I have a table in BQ
WITH tbl_campaign_ipmapping AS
(
SELECT 'advertiser1' as advertiser, 'campaign1' as campaign, ['10.0.0.0','20.0.0.0','30.0.0.0', '40.0.0.0'] AS ip_array UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign2' as campaign, ['10.0.0.0', '20.0.0.0', '50.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign3' as campaign, ['10.0.0.0', '40.0.0.0', '60.0.0.0', '70.0.0.0', '80.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign4' as campaign, ['10.0.0.0', '20.0.0.0', '30.0.0.0'] UNION ALL
SELECT 'advertiser2' , 'campaign1' , ['10.1.1.1','20.1.1.1','30.1.1.1', '40.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign2' , ['10.1.1.1', '20.1.1.1', '50.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign3' , ['10.1.1.1', '40.1.1.1', '60.1.1.1', '70.1.1.1', '80.1.1.1'] UNION ALL
SELECT 'advertiser2', 'campaign4' , ['10.1.1.1', '20.1.1.1', '30.1.1.1']
) select * from tbl_campaign_ipmapping
For an input advertiser lets say advertiser1 The logic to be implemented is depending For each row retrieve the list of IP's and compare them with the IP's from all other rows(excluding the current row) and print the IP's that's not present.
So the expected output is
advertiser1, campaign1, ['50.0.0.0', '60.0.0.0', '70.0.0.0']
advertiser1,campaign2, ['30.0.0.0', '40.0.0.0', '60.0.0.0', '70.0.0.0']
advertiser1,campaign3, ['20.0.0.0','30.0.0.0', '50.0.0.0' ]
advertiser1,campaign4, ['40.0.0.0', '50.0.0.0', '60.0.0.0', '70.0.0.0'
CodePudding user response:
You may try utilizing another CTE to find all possible IPs before using this list to gather all other ips not apart of the current ip array eg
WITH tbl_campaign_ipmapping AS
(
SELECT 'advertiser1' as advertiser, 'campaign1' as campaign, ['10.0.0.0','20.0.0.0','30.0.0.0', '40.0.0.0'] AS ip_array UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign2' as campaign, ['10.0.0.0', '20.0.0.0', '50.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign3' as campaign, ['10.0.0.0', '40.0.0.0', '60.0.0.0', '70.0.0.0', '80.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign4' as campaign, ['10.0.0.0', '20.0.0.0', '30.0.0.0'] UNION ALL
SELECT 'advertiser2' , 'campaign1' , ['10.1.1.1','20.1.1.1','30.1.1.1', '40.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign2' , ['10.1.1.1', '20.1.1.1', '50.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign3' , ['10.1.1.1', '40.1.1.1', '60.1.1.1', '70.1.1.1', '80.1.1.1'] UNION ALL
SELECT 'advertiser2', 'campaign4' , ['10.1.1.1', '20.1.1.1', '30.1.1.1']
),
all_ips AS (
SELECT DISTINCT ip
FROM tbl_campaign_ipmapping, unnest(ip_array) ip
WHERE advertiser='advertiser1'
)
select
advertiser ,
campaign,
ARRAY(
SELECT i.ip
FROM all_ips i
WHERE i.ip NOT IN (
SELECT ipa FROM UNNEST(t.ip_array) ipa
)
) as other_ips
from
tbl_campaign_ipmapping t
WHERE advertiser='advertiser1'
or joining on a table of all possible ips (you may also consider materializing all_ips) eg
WITH tbl_campaign_ipmapping AS
(
SELECT 'advertiser1' as advertiser, 'campaign1' as campaign, ['10.0.0.0','20.0.0.0','30.0.0.0', '40.0.0.0'] AS ip_array UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign2' as campaign, ['10.0.0.0', '20.0.0.0', '50.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign3' as campaign, ['10.0.0.0', '40.0.0.0', '60.0.0.0', '70.0.0.0', '80.0.0.0'] UNION ALL
SELECT 'advertiser1' as advertiser, 'campaign4' as campaign, ['10.0.0.0', '20.0.0.0', '30.0.0.0'] UNION ALL
SELECT 'advertiser2' , 'campaign1' , ['10.1.1.1','20.1.1.1','30.1.1.1', '40.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign2' , ['10.1.1.1', '20.1.1.1', '50.1.1.1'] UNION ALL
SELECT 'advertiser2' , 'campaign3' , ['10.1.1.1', '40.1.1.1', '60.1.1.1', '70.1.1.1', '80.1.1.1'] UNION ALL
SELECT 'advertiser2', 'campaign4' , ['10.1.1.1', '20.1.1.1', '30.1.1.1']
),
all_ips AS (
SELECT DISTINCT ip
FROM tbl_campaign_ipmapping, unnest(ip_array) ip
WHERE advertiser='advertiser1'
)
select
advertiser,
campaign,
ARRAY_AGG(i.ip) as other_ips
from
tbl_campaign_ipmapping t
inner join
all_ips i on i.ip NOT IN UNNEST(t.ip_array)
WHERE advertiser='advertiser1'
group by
advertiser,campaign
Let me know if this works for you
CodePudding user response:
Consider below approach
with advertiser_ips as (
select advertiser, array_agg(distinct ip) ip_array
from tbl_campaign_ipmapping, unnest(ip_array) ip
group by advertiser
)
select advertiser, campaign,
array( select distinct ip
from b.ip_array ip
where not ip in unnest(a.ip_array)
) as missing_ip
from tbl_campaign_ipmapping a
join advertiser_ips b
using(advertiser)
if applied to sample data in your question - output is