Home > Software design >  Perform Snowflake SQL queries in FOR LOOP based on data from another table
Perform Snowflake SQL queries in FOR LOOP based on data from another table

Time:03-31

In Snowflake, I am trying to create a SQL script with a for loop that outputs the results into a new table based on the the data_type column

I have a table called PROFILE_TABLE_LIST that has the columns with a table name and column name, and data type as shown below:

TABLENAME COLUMN_NAME DATA_TYPE
Table1 PLANTS TEXT
Table1 HEIGHT FLOAT
Table2 COLOR TEXT
Table2 SMELL TEXT

I am currently trying to do a for loop using a cursor and perform the queries on each of the rows to profile the table based on the column types to look something like this:

In Snowflake, I am trying to create a SQL script with a for loop that output the results into a new table based on the the data_type column

I have a table called PROFILE_TABLE_LIST that has the columns with a table name and column name, and data type as shown below:

TABLENAME COLUMN_NAME DATA_TYPE
Table1 PLANTS TEXT
Table1 HEIGHT FLOAT
Table2 COLOR TEXT
Table2 SMELL TEXT

I am currently trying to do a for loop using a cursor and perform the queries on each of the rows to profile the table based on the column types to look something like this:

TABLENAME COLUMN_NAME DATA_TYPE COUNT MAX_LENGTH MAX_VALUE
Table1 PLANTS TEXT 10 82 NULL
Table1 HEIGHT FLOAT 10 NULL 58.6
Table2 COLOR TEXT 20 56 NULL
Table2 SMELL TEXT 20 23 NULL

Eventually, I want to run different select statements conditioned on the data_type, but at this stage, I am only focusing on the count. This is the current loop I have. However, the select statement is not getting executed properly as the table name is being passed as a string, and if I use TABLE(tablename) I receive a syntax error (I have that line commented out below:

declare
    tablename string;
    column_name string;
    row_count integer;
    table_schema string;
    table_catalog string;
    name string;
    
    tmp_array ARRAY default ARRAY_CONSTRUCT();

    res resultset default (select * from PROFILE_TABLE_LIST);
    c1 cursor for res;
    rs_output RESULTSET;
begin

  for record in c1 do
    tablename := record.TABLENAME;
    column_name := record.column_name;
    table_schema := record.table_schema;
    table_catalog := record.table_catalog;
   
   
    
    tmp_array := array_append(:tmp_array, OBJECT_CONSTRUCT('tmp_tables', record.TABLENAME, 'COUNT', (SELECT COUNT(column_name) FROM tablename)));
    -- tmp_array := array_append(:tmp_array, OBJECT_CONSTRUCT('tmp_tables', record.TABLENAME, 'COUNT', (SELECT COUNT(column_name) FROM TABLE(tablename))));
 
     
    
  end for;

  rs_output := (select value:tmp_tables, value:COUNT from table(flatten(:tmp_array)));
  return table(rs_output);
end;

CodePudding user response:

I would build a block of SQL and run it at the end.

Which initialing using a simple SELECT as a pattern to show the building up process, that you could write your own dynamic sql from:

WITH table_list as (
    SELECT * FROM VALUES
    ('Table1', 'PLANTS', 'TEXT'),
    ('Table1', 'HEIGHT', 'FLOAT'),
    ('Table2', 'COLOR', 'TEXT'),
    ('Table2', 'SMELL', 'TEXT')
    v(tablename, column_name, data_type)
), to_rows as (
    SELECT
        tablename
        ,CASE data_type
            WHEN 'TEXT' THEN 'SELECT ''tablename'' as TABLENAME, ''column_name'' as COLUMN_NAME, ''data_type'' as DATA_TYPE, count(column_name) as count, MAX(LEN(column_name)) as max_length, null as max_value FROM tablename '
            WHEN 'FLOAT' THEN 'SELECT ''tablename'' as TABLENAME, ''column_name'' as COLUMN_NAME, ''data_type'' as DATA_TYPE, count(column_name) as count, null as max_length, MAX(column_name) as max_value FROM tablename '
        END as sql
        ,REPLACE(REPLACE(REPLACE(sql, 'data_type', data_type), 'column_name', column_name), 'tablename', tablename) as final_sql
    FROM table_list
)
SELECT
    listagg (final_sql, ' UNION ALL ') within group(order by tablename) as the_big_sql
FROM to_rows;

which gives:

THE_BIG_SQL
SELECT 'Table1' as TABLENAME, 'PLANTS' as COLUMN_NAME, 'TEXT' as DATA_TYPE, count(PLANTS) as count, MAX(LEN(PLANTS)) as max_length, null as max_value FROM Table1 UNION ALL SELECT 'Table1' as TABLENAME, 'HEIGHT' as COLUMN_NAME, 'FLOAT' as DATA_TYPE, count(HEIGHT) as count, null as max_length, MAX(HEIGHT) as max_value FROM Table1 UNION ALL SELECT 'Table2' as TABLENAME, 'COLOR' as COLUMN_NAME, 'TEXT' as DATA_TYPE, count(COLOR) as count, MAX(LEN(COLOR)) as max_length, null as max_value FROM Table2 UNION ALL SELECT 'Table2' as TABLENAME, 'SMELL' as COLUMN_NAME, 'TEXT' as DATA_TYPE, count(SMELL) as count, MAX(LEN(SMELL)) as max_length, null as max_value FROM Table2

Which if ran against these tables:

create table table1(plants text, height float);
create table table2(color text, smell text);

insert into table1 values ('big plant', 10.1),('medium plant', 5.3),('tiny', 1.0);
insert into table2 values ('red', 'bold'), ('blue', 'weak');

gives:

TABLENAME COLUMN_NAME DATA_TYPE COUNT MAX_LENGTH MAX_VALUE
Table1 PLANTS TEXT 3 12
Table1 HEIGHT FLOAT 3 10.1
Table2 COLOR TEXT 2 4
Table2 SMELL TEXT 2 4

But here is the dynamic answer fully written for you:

first making the TABLE with the work to be done:

CREATE TABLE PROFILE_TABLE_LIST AS 
SELECT * FROM VALUES
    ('Table1', 'PLANTS', 'TEXT'),
    ('Table1', 'HEIGHT', 'FLOAT'),
    ('Table2', 'COLOR', 'TEXT'),
    ('Table2', 'SMELL', 'TEXT')
    v(tablename, column_name, data_type);

and using the prior created "real data tables" we can use:

declare
    sql string;
    final_sql string;
    c1 cursor for (select * from PROFILE_TABLE_LIST);
    res resultset;
begin
  final_sql := '';
  
  for record in c1 do
    if(record.data_type = 'TEXT') THEN
        sql := 'SELECT '''||record.tablename||''' as TABLENAME, '''||record.column_name||''' as COLUMN_NAME, '''||record.data_type||''' as DATA_TYPE, count('||record.column_name||') as count, MAX(LEN('||record.column_name||')) as max_length, null as max_value FROM '||record.tablename||' ';
    else
        sql := 'SELECT '''||record.tablename||''' as TABLENAME, '''||record.column_name||''' as COLUMN_NAME, ''data_type'' as DATA_TYPE, count('||record.column_name||') as count, null as max_length, MAX('||record.column_name||') as max_value FROM '||record.tablename||' ';
    end if;
    if(final_sql<>'')then 
        final_sql := final_sql || ' UNION ALL ';
    end if;
    final_sql := final_sql || sql;
        
  end for;

  res := (execute immediate :final_sql);
  return table(res);
end;

gives:

TABLENAME COLUMN_NAME DATA_TYPE COUNT MAX_LENGTH MAX_VALUE
Table1 PLANTS TEXT 3 12
Table1 HEIGHT data_type 3 10.1
Table2 COLOR TEXT 2 4
Table2 SMELL TEXT 2 4
  • Related