Home > Software design >  How to increase performance when inserting more than 40k rows
How to increase performance when inserting more than 40k rows

Time:11-25

CREATE TABLE new_details_staging 
(
    e_id         NUMBER(10),
    e_name       VARCHAR2(30),
    portal_desc  VARCHAR2(50),
    risk_dec     VARCHAR2(50),
    CONSTRAINT pk_new_details_staging PRIMARY KEY (e_id)
);

INSERT INTO new_details_staging 
VALUES (11, 'A', 'AA', 'High');

INSERT INTO new_details_staging 
VALUES (22, 'B', 'BB', 'Low');

CREATE TABLE lookup_ref 
(
    ref_id       NUMBER(10),
    ref_typ      VARCHAR2(30),
    ref_typ_desc VARCHAR2(20),
    CONSTRAINT pk_lookup_ref PRIMARY KEY (ref_id)
);

INSERT INTO lookup_ref 
VALUES (181, 'portal', 'AA');

INSERT INTO lookup_ref 
VALUES (182, 'portal', 'BB');

INSERT INTO lookup_ref 
VALUES (183, 'risk', 'High');

INSERT INTO lookup_ref 
VALUES (184, 'risk', 'Low');

CREATE TABLE new_details_main 
(
    e_id    NUMBER(10),
    e_name  VARCHAR2(30),
    portal  NUMBER(20),
    risk    NUMBER(20),
    CONSTRAINT pk_new_details_main PRIMARY KEY (e_id)
);

COMMIT;

My attempt:

INSERT INTO new_details_main (e_id, e_name, portal,risk)
    SELECT  
        n.e_id,
        n.e_name,
        (SELECT lr.ref_id
         FROM lookup_ref lr
         WHERE lr.ref_typ = 'portal'
           AND lr.ref_typ_desc = n.portal_desc),
        (SELECT lr.ref_id
         FROM lookup_ref lr
         WHERE lr.ref_typ = 'risk'
           AND lr.ref_typ_desc = n.risk_dec)
    FROM    
        new_details_staging n;

Currently, I am inserting a few records and it is giving exact results but in actuality, there are 40k records so I believe it will give performance issues also. Is there any way to insert the records faster because I will write a procedure for this insertion? Is there any other in which I can write an insert query inside the procedure?

CodePudding user response:

On my laptop, we can scale your test easily enough

SQL>
SQL> CREATE TABLE new_details_staging
  2  (
  3      e_id         NUMBER(10),
  4      e_name       VARCHAR2(30),
  5      portal_desc  VARCHAR2(50),
  6      risk_dec     VARCHAR2(50),
  7      CONSTRAINT pk_new_details_staging PRIMARY KEY (e_id)
  8  );

Table created.

SQL>
SQL> INSERT INTO new_details_staging VALUES (11, 'A', 'AA', 'High');

1 row created.

SQL> INSERT INTO new_details_staging  VALUES (22, 'B', 'BB', 'Low');

1 row created.

SQL>
SQL> insert into new_details_staging
  2  select e_id*500000 rownum, e_name, portal_desc, risk_dec
  3  from new_details_staging,
  4   ( select 1 from dual connect by level <= 400000 );

800000 rows created.

SQL>
SQL> CREATE TABLE lookup_ref
  2  (
  3      ref_id       NUMBER(10),
  4      ref_typ      VARCHAR2(30),
  5      ref_typ_desc VARCHAR2(20),
  6      CONSTRAINT pk_lookup_ref PRIMARY KEY (ref_id)
  7  );

Table created.

SQL>
SQL> INSERT INTO lookup_ref VALUES (181, 'portal', 'AA');

1 row created.

SQL> INSERT INTO lookup_ref VALUES (182, 'portal', 'BB');

1 row created.

SQL> INSERT INTO lookup_ref VALUES (183, 'risk', 'High');

1 row created.

SQL> INSERT INTO lookup_ref VALUES (184, 'risk', 'Low');

1 row created.

SQL>
SQL> CREATE TABLE new_details_main
  2  (
  3      e_id    NUMBER(10),
  4      e_name  VARCHAR2(30),
  5      portal  NUMBER(20),
  6      risk    NUMBER(20),
  7      CONSTRAINT pk_new_details_main PRIMARY KEY (e_id)
  8  );

Table created.

SQL>
SQL> set timing on
SQL> INSERT INTO new_details_main (e_id, e_name, portal,risk)
  2      SELECT
  3          n.e_id,
  4          n.e_name,
  5          (SELECT lr.ref_id
  6           FROM lookup_ref lr
  7           WHERE lr.ref_typ = 'portal'
  8             AND lr.ref_typ_desc = n.portal_desc),
  9          (SELECT lr.ref_id
 10           FROM lookup_ref lr
 11           WHERE lr.ref_typ = 'risk'
 12             AND lr.ref_typ_desc = n.risk_dec)
 13      FROM
 14          new_details_staging n;

800002 rows created.

Elapsed: 00:00:02.97
SQL>
SQL>

So around 3 seconds for 800,000 rows. I think you'll be fine :-)

If the lookups are always true and key preserved, you might get some benefit converting to a join, eg

SQL> set timing on
SQL> INSERT INTO new_details_main (e_id, e_name, portal,risk)
  2      SELECT
  3          n.e_id,
  4          n.e_name,
  5          lr.ref_id,
  6          lr1.ref_id
  7      FROM
  8          new_details_staging n,
  9          lookup_ref lr,
 10          lookup_ref lr1
 11  where lr.ref_typ = 'portal'
 12  AND lr.ref_typ_desc = n.portal_desc
 13  and lr1.ref_typ = 'risk'
 14  AND lr1.ref_typ_desc = n.risk_dec ;

800002 rows created.

Elapsed: 00:00:02.64
  • Related