I'm trying to join two tables in Snowflake using REGEX_SUBSTR and a wildcard but having no luck. Here is what I have:
SELECT P.NAME,
ACL.CONTENT_NAME,
REGEXP_SUBSTR(CONTENT_NAME, '/([^/] (\\.pdf))$', 1, 1, 'e', 1) AS PDFS
FROM ACTIVITY_DOWNLOAD AD
JOIN PROGRAM P
ON P.NAME LIKE '%' || REGEXP_SUBSTR(CONTENT_NAME, '/([^/] (\\.pdf))$', 1, 1, 'e', 1) || '%'
Running the query doesn't return anything--any ideas on what the issue is?
CodePudding user response:
using some CTE's for fake data:
WITH ACTIVITY_DOWNLOAD(CONTENT_NAME) as (
select * from values
('http://www.example.com/string1/string2.pdf'),
('http://www.example.com/string1/string3.gif')
), PROGRAM(name) as (
select * from values
('string2.pdf')
)
and the SQL
SELECT P.NAME,
ACL.CONTENT_NAME,
REGEXP_SUBSTR(CONTENT_NAME, '/([^/] (\\.pdf))$', 1, 1, 'e', 1) AS PDFS
FROM ACTIVITY_DOWNLOAD ACL
JOIN PROGRAM P
ON P.NAME LIKE '%' || REGEXP_SUBSTR(ACL.CONTENT_NAME, '/([^/] (\\.pdf))$', 1, 1, 'e', 1) || '%'
gives:
NAME | CONTENT_NAME | PDFS |
---|---|---|
string2.pdf | http://www.example.com/string1/string2.pdf | string2.pdf |
which aligned with what I would expect.
depending how large your ACTIVITY_DOWNLOAD
table is, you might find a CTE/sub-select give better performance.
SELECT P.NAME,
f.CONTENT_NAME,
f.file_name AS PDFS
FROM (
select distinct
CONTENT_NAME,
REGEXP_SUBSTR(ACL.CONTENT_NAME, '/([^/] (\\.pdf))$', 1, 1, 'e', 1) as file_name,
'%' || file_name || '%' as like_filter
from ACTIVITY_DOWNLOAD as acl
)f
JOIN PROGRAM P
ON P.NAME LIKE f.like_filter
OR
WITH clean_content_names as (
select distinct
CONTENT_NAME,
REGEXP_SUBSTR(ACL.CONTENT_NAME, '/([^/] (\\.pdf))$', 1, 1, 'e', 1) as file_name,
'%' || file_name || '%' as like_filter
from ACTIVITY_DOWNLOAD as acl
)
SELECT P.NAME,
f.CONTENT_NAME,
f.file_name AS PDFS
FROM clean_content_names f
JOIN PROGRAM P
ON P.NAME LIKE f.like_filter
WITH ACTIVITY_DOWNLOAD(CONTENT_NAME) as (
select * from values
('http://www.example.com/string1/string2.pdf'),
('http://www.example.com/string1/string3.gif')
), PROGRAM(name) as (
select * from values
('string2.pdf')
)
SELECT P.NAME,
f.CONTENT_NAME,
f.file_name AS PDFS
FROM (
select distinct
CONTENT_NAME,
REGEXP_SUBSTR(ACL.CONTENT_NAME, '/([^/] (\\.pdf))$', 1, 1, 'e', 1) as file_name,
'%' || file_name || '%' as like_filter
from ACTIVITY_DOWNLOAD as acl
)f
JOIN PROGRAM P
ON P.NAME LIKE f.like_filter;
NAME | CONTENT_NAME | PDFS |
---|---|---|
string2.pdf | http://www.example.com/string1/string2.pdf | string2.pdf |