Hi,
I used this"ugly" (dynamic SQL) procedure below to transpose the output of Text Analysis in HANA.
The procedure may not be as flexible as required, but at least it is dynamic.
It also concatenates multi values. You may want to get rid of this STRING_AGG piece in the query.
Furthermore, the procedure creates all columns as VARCHAR(5000) which is of course not appropriate when dealing with numbers.
Performance is ok, at least it is sufficient for my purposes. It takes about 15 sec to transpose 5 mio. rows to 65k rows and 63 columns.
Regards, Markus
-- THE DATA
CREATE SCHEMA "TEST";
SET SCHEMA "TEST";
DROP TABLE "DOCUMENTS";
CREATE COLUMN TABLE "DOCUMENTS"(
"ID" INTEGER PRIMARY KEY,
"CONTENT" VARCHAR(5000)
);
INSERT INTO "DOCUMENTS" VALUES (1,'I am Batman and I work for Wayne Enterprises. Spiderman is my friend.');
CREATE FULLTEXT INDEX "FTI_DOCUMENTS_CONTENT" ON "DOCUMENTS"("CONTENT") TEXT ANALYSIS ON CONFIGURATION 'EXTRACTION_CORE';
SELECT * FROM "$TA_FTI_DOCUMENTS_CONTENT";
DROP VIEW "V_TA";
CREATE VIEW "V_TA" AS (SELECT "ID" AS "ID", "TA_RULE", "TA_TOKEN", "TA_TYPE" FROM "$TA_FTI_DOCUMENTS_CONTENT");
-- THE LOGIC
SELECT T."ID", T."TA_RULE" ,"PERSON","ORGANIZATION/COMMERCIAL"
FROM (SELECT DISTINCT "ID","TA_RULE" FROM "TEST"."V_TA") AS T
LEFT JOIN (SELECT "ID","TA_RULE",TO_CLOB(STRING_AGG("TA_TOKEN",'|')) AS "PERSON"
FROM (SELECT DISTINCT "ID","TA_RULE","TA_TOKEN" FROM "TEST"."V_TA" WHERE "TA_TYPE" = 'PERSON') GROUP BY "ID", "TA_RULE") AS T0
ON T."ID" = T0."ID" AND T."TA_RULE" = T0."TA_RULE"
LEFT JOIN (SELECT "ID","TA_RULE",TO_CLOB(STRING_AGG("TA_TOKEN",'|')) AS "ORGANIZATION/COMMERCIAL"
FROM (SELECT DISTINCT "ID","TA_RULE","TA_TOKEN" FROM "TEST"."V_TA" WHERE "TA_TYPE" = 'ORGANIZATION/COMMERCIAL') GROUP BY "ID", "TA_RULE") AS T1
ON T."ID" = T1."ID" AND T."TA_RULE" = T1."TA_RULE";
-- THE PROCEDURE
CREATE GLOBAL TEMPORARY TABLE "TEST"."TRANSPOSE_DYN_OUT" ("ID" INTEGER);-- just a dummy
DROP PROCEDURE "TRANSPOSE_DYN";
CREATE PROCEDURE "TRANSPOSE_DYN" (IN i_limit INTEGER, OUT o_sql CLOB)
LANGUAGE SQLSCRIPT
AS BEGIN
DECLARE v_i INTEGER := 0;
DECLARE v_col_list CLOB := '';
DECLARE v_col_list2 CLOB := '';
DECLARE v_select CLOB := '';
DECLARE v_sql CLOB := '';
DECLARE CURSOR c_name FOR SELECT "TA_TYPE" AS "NAME" FROM "TEST"."V_TA" GROUP BY "TA_TYPE" ORDER BY COUNT(*) DESC LIMIT :i_limit; -- get the columns
FOR cur_row AS c_name DO
v_col_list := :v_col_list || ',"'||cur_row.NAME||'"';
v_col_list2 := :v_col_list2 || ',"'||cur_row.NAME||'" VARCHAR(5000)';
v_select := :v_select || ' LEFT JOIN (SELECT "ID", "TA_RULE", TO_VARCHAR(STRING_AGG("TA_TOKEN",''|'')) AS "'||cur_row.NAME||'" ';
v_select := :v_select || ' FROM (SELECT DISTINCT "ID", "TA_RULE", "TA_TOKEN" FROM "TEST"."V_TA" WHERE "TA_TYPE" = '''||cur_row.NAME||''') GROUP BY "ID", "TA_RULE") AS T'||:v_i;
v_select := :v_select || ' ON "T"."ID" = "T'||:v_i||'"."ID" AND "T"."TA_RULE" = "T'||:v_i||'"."TA_RULE" ' ;
v_i := :v_i + 1;
END FOR;
EXEC 'DROP TABLE "TEST"."TRANSPOSE_DYN_OUT"';
EXEC 'CREATE GLOBAL TEMPORARY TABLE "TEST"."TRANSPOSE_DYN_OUT"("ID" VARCHAR(5000), "TA_RULE" VARCHAR(5000)' || :v_col_list2 || ')';
v_sql := 'SELECT "T"."ID", "T"."TA_RULE" '||:v_col_list||' FROM (SELECT DISTINCT "ID", "TA_RULE" FROM "TEST"."V_TA") AS T ' || :v_select || ' INTO "TEST"."TRANSPOSE_DYN_OUT"';
EXEC :v_sql;
o_sql := :v_sql;
END;
CALL "TEST"."TRANSPOSE_DYN"(10,?); --IN LIMIT, OUT SQL
SELECT * FROM "TEST"."TRANSPOSE_DYN_OUT";