HHeLiBeXの日記 正道編

日々の記憶の記録とメモ‥

Re:XMLEXISTSを使う場合の注意点(だと思う)

XMLEXISTSを使う場合の注意点(だと思う) - HHeLiBeXの日記 正道編
一応、記録のため(何)に、実験を再現するためのクエリとかを残しておく。(普段はそうするんだけど、なぜか今回はすっかり忘れていた‥)
まずはデータ。そのまんま載せるととんでもないことになるので、データ生成のためのスクリプトを残す。

#! /bin/bash

prefix=import_data

# generate target data
(
    printf "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
    printf "<hoge>\n"
    for n in 1 5 10 50 100 500 1000 ; do
        printf "<hoge_%04d>\n" ${n}
        for ((i = n; i > 0; --i)); do
            printf "<h_%04d id_%04d=\"%04d\">%04d</h_%04d>\n" ${n} ${n} ${i} ${i} ${n}
        done
        printf "</hoge_%04d>\n" ${n}
    done
    printf "</hoge>\n"
) > "${prefix}".001.xml

# generate dummy data
(
    printf "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
    printf "<hoge>\n"
    for n in 1 5 10 50 100 500 1000 ; do
        printf "<hoge_%04d>\n" ${n}
        for ((i = n; i > 0; --i)); do
            ii=$((${i}+1000))
            printf "<h_%04d id_%04d=\"%04d\">%04d</h_%04d>\n" ${n} ${n} ${ii} ${ii} ${n}
        done
        printf "</hoge_%04d>\n" ${n}
    done
    printf "</hoge>\n"
) > "${prefix}".002.xml

# generate DEL file
(
    for ((i = 0; i < 1000; ++i)); do
        if [ ${i} -eq 10 ]; then
            printf "%d,\"<XDS FIL='%s.002.xml' />\"\n" ${i} "${prefix}"
        else
            printf "%d,\"<XDS FIL='%s.001.xml' />\"\n" ${i} "${prefix}"
        fi
    done
) > "${prefix}"

次にデータベースおよびテーブルの生成。

CREATE DATABASE HOGE USING CODESET UTF-8 TERRITORY JP COLLATE USING SYSTEM PAGESIZE 32 K ;
CONNECT TO HOGE USER db2admin USING db2admin ;
DROP TABLE DOCUMENT ;
CREATE TABLE DOCUMENT (
    ID VARCHAR(8) NOT NULL,
    DATA XML NOT NULL,
    CONSTRAINT HOGE_DOC_PKEY PRIMARY KEY (ID)) ;

CREATE INDEX DOC_DATA_0001 ON DOCUMENT(DATA) GENERATE KEY USING XMLPATTERN '/hoge/hoge_0001/h_0001' AS SQL VARCHAR(8) ;
CREATE INDEX DOC_DATA_0005 ON DOCUMENT(DATA) GENERATE KEY USING XMLPATTERN '/hoge/hoge_0005/h_0005' AS SQL VARCHAR(8) ;
CREATE INDEX DOC_DATA_0010 ON DOCUMENT(DATA) GENERATE KEY USING XMLPATTERN '/hoge/hoge_0010/h_0010' AS SQL VARCHAR(8) ;
CREATE INDEX DOC_DATA_0050 ON DOCUMENT(DATA) GENERATE KEY USING XMLPATTERN '/hoge/hoge_0050/h_0050' AS SQL VARCHAR(8) ;
CREATE INDEX DOC_DATA_0100 ON DOCUMENT(DATA) GENERATE KEY USING XMLPATTERN '/hoge/hoge_0100/h_0100' AS SQL VARCHAR(8) ;
CREATE INDEX DOC_DATA_0500 ON DOCUMENT(DATA) GENERATE KEY USING XMLPATTERN '/hoge/hoge_0500/h_0500' AS SQL VARCHAR(8) ;
CREATE INDEX DOC_DATA_1000 ON DOCUMENT(DATA) GENERATE KEY USING XMLPATTERN '/hoge/hoge_1000/h_1000' AS SQL VARCHAR(8) ;

IMPORT FROM import_data OF DEL MODIFIED BY LOBSINFILE COMMITCOUNT 100 INSERT_UPDATE INTO DOCUMENT ;
RUNSTATS ON TABLE db2admin.DOCUMENT ;
RUNSTATS ON TABLE db2admin.DOCUMENT FOR INDEX ALL ;
REORG TABLE DOCUMENT ;
REORG INDEXES ALL FOR TABLE DOCUMENT ;
CONNECT RESET ;

データのボリュームが大きいので、IMPORTする際に100行ずつコミットする。LOADでもいいのだが、DB2 v9.1ではXMLデータがロードできないとかいう制約があったようななかったような(未確認)、で確かめるのが面倒だったので(ぉ
続いて実行するクエリなのだが、これも結構なボリュームなので、自動生成するスクリプト

#! /bin/bash

(
    printf "%s\n" "--#BGBLK 5"
    
    for n in 1 5 10 50 100 500 1000 ; do
        printf "xquery count(\n"
        printf " for \$doc in db2-fn:sqlquery(\"\n"
        printf "   select xmlquery('for \$doc in \$root/hoge return \$doc' passing DOCUMENT.DATA as \"\"root\"\")\n"
        printf "    from DOCUMENT\n"
        printf "    where (\n"
        printf "      xmlexists('\$root/hoge/hoge_%04d/h_%04d[. = \"\"%04d\"\"]' passing data as \"\"root\"\")\n" ${n} ${n} ${n}
        printf "    )\n"
        printf " \")\n"
        printf " return \$doc)\n"
        printf ";\n"
    done
    
    for n in 1 5 10 50 100 500 1000 ; do
        printf "xquery count(\n"
        printf " for \$doc in db2-fn:sqlquery(\"\n"
        printf "   select xmlquery('for \$doc in \$root/hoge return \$doc' passing DOCUMENT.DATA as \"\"root\"\")\n"
        printf "    from DOCUMENT\n"
        printf "    where (\n"
        printf "      xmlexists('\$root/hoge[hoge_%04d/h_%04d = \"\"%04d\"\"]' passing data as \"\"root\"\")\n" ${n} ${n} ${n}
        printf "    )\n"
        printf " \")\n"
        printf " return \$doc)\n"
        printf ";\n"
    done
    
    printf "%s\n" "--#EOBLK"
) > query.sql

で、db2batchの実行。

db2batch -d HOGE -a db2admin/db2admin -f query.sql -r result.txt -g off -o r -1 f -1 -i complete