PostgreSQL
 sql >> Database >  >> RDS >> PostgreSQL

Importa file XML in PostgreSQL

Negromanzia:Per coloro che necessitano di un esempio funzionante:

DO $$
   DECLARE myxml xml;
BEGIN

myxml := XMLPARSE(DOCUMENT convert_from(pg_read_binary_file('MyData.xml'), 'UTF8'));

DROP TABLE IF EXISTS mytable;
CREATE TEMP TABLE mytable AS 

SELECT 
     (xpath('//ID/text()', x))[1]::text AS id
    ,(xpath('//Name/text()', x))[1]::text AS Name 
    ,(xpath('//RFC/text()', x))[1]::text AS RFC
    ,(xpath('//Text/text()', x))[1]::text AS Text
    ,(xpath('//Desc/text()', x))[1]::text AS Desc
FROM unnest(xpath('//record', myxml)) x
;

END$$;


SELECT * FROM mytable;

O con meno rumore

SELECT 
     (xpath('//ID/text()', myTempTable.myXmlColumn))[1]::text AS id
    ,(xpath('//Name/text()', myTempTable.myXmlColumn))[1]::text AS Name 
    ,(xpath('//RFC/text()', myTempTable.myXmlColumn))[1]::text AS RFC
    ,(xpath('//Text/text()', myTempTable.myXmlColumn))[1]::text AS Text
    ,(xpath('//Desc/text()', myTempTable.myXmlColumn))[1]::text AS Desc
    ,myTempTable.myXmlColumn as myXmlElement
FROM unnest(
    xpath
    (    '//record'
        ,XMLPARSE(DOCUMENT convert_from(pg_read_binary_file('MyData.xml'), 'UTF8'))
    )
) AS myTempTable(myXmlColumn)
;

Con questo file XML di esempio (MyData.xml):

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<data-set>
    <record>
        <ID>1</ID>
        <Name>A</Name>
        <RFC>RFC 1035[1]</RFC>
        <Text>Address record</Text>
        <Desc>Returns a 32-bit IPv4 address, most commonly used to map hostnames to an IP address of the host, but it is also used for DNSBLs, storing subnet masks in RFC 1101, etc.</Desc>
    </record>
    <record>
        <ID>2</ID>
        <Name>NS</Name>
        <RFC>RFC 1035[1]</RFC>
        <Text>Name server record</Text>
        <Desc>Delegates a DNS zone to use the given authoritative name servers</Desc>
    </record>
</data-set>

Nota:
MyData.xml deve trovarsi nella directory PG_Data (la directory padre della directory pg_stat).
es. /var/lib/postgresql/9.3/main/MyData.xml
Ciò richiede PostGreSQL 9.1+

Nel complesso, puoi ottenerlo senza file, in questo modo:

SELECT 
     (xpath('//ID/text()', myTempTable.myXmlColumn))[1]::text AS id
    ,(xpath('//Name/text()', myTempTable.myXmlColumn))[1]::text AS Name 
    ,(xpath('//RFC/text()', myTempTable.myXmlColumn))[1]::text AS RFC
    ,(xpath('//Text/text()', myTempTable.myXmlColumn))[1]::text AS Text
    ,(xpath('//Desc/text()', myTempTable.myXmlColumn))[1]::text AS Desc
    ,myTempTable.myXmlColumn as myXmlElement 
    -- Source: https://en.wikipedia.org/wiki/List_of_DNS_record_types
FROM unnest(xpath('//record', 
 CAST('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<data-set>
    <record>
        <ID>1</ID>
        <Name>A</Name>
        <RFC>RFC 1035[1]</RFC>
        <Text>Address record</Text>
        <Desc>Returns a 32-bit IPv4 address, most commonly used to map hostnames to an IP address of the host, but it is also used for DNSBLs, storing subnet masks in RFC 1101, etc.</Desc>
    </record>
    <record>
        <ID>2</ID>
        <Name>NS</Name>
        <RFC>RFC 1035[1]</RFC>
        <Text>Name server record</Text>
        <Desc>Delegates a DNS zone to use the given authoritative name servers</Desc>
    </record>
</data-set>
' AS xml)   
)) AS myTempTable(myXmlColumn)
;

Si noti che a differenza di MS-SQL, xpath text() restituisce NULL su un valore NULL e non una stringa vuota.
Se per qualsiasi motivo è necessario verificare esplicitamente l'esistenza di NULL, è possibile utilizzare [not(@xsi:nil="true")] , a cui devi passare un array di namespace, perché altrimenti ottieni un errore (tuttavia, puoi omettere tutti i namespace tranne xsi).

SELECT 
     (xpath('//xmlEncodeTest[1]/text()', myTempTable.myXmlColumn))[1]::text AS c1

    ,(
    xpath('//xmlEncodeTest[1][not(@xsi:nil="true")]/text()', myTempTable.myXmlColumn
    ,
    ARRAY[
        -- ARRAY['xmlns','http://www.w3.org/1999/xhtml'], -- defaultns
        ARRAY['xsi','http://www.w3.org/2001/XMLSchema-instance'],
        ARRAY['xsd','http://www.w3.org/2001/XMLSchema'],        
        ARRAY['svg','http://www.w3.org/2000/svg'],
        ARRAY['xsl','http://www.w3.org/1999/XSL/Transform']
    ]
    )
    )[1]::text AS c22


    ,(xpath('//nixda[1]/text()', myTempTable.myXmlColumn))[1]::text AS c2 
    --,myTempTable.myXmlColumn as myXmlElement
    ,xmlexists('//xmlEncodeTest[1]' PASSING BY REF myTempTable.myXmlColumn) AS c1e
    ,xmlexists('//nixda[1]' PASSING BY REF myTempTable.myXmlColumn) AS c2e
    ,xmlexists('//xmlEncodeTestAbc[1]' PASSING BY REF myTempTable.myXmlColumn) AS c1ea
FROM unnest(xpath('//row', 
     CAST('<?xml version="1.0" encoding="utf-8"?>
    <table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
      <row>
        <xmlEncodeTest xsi:nil="true" />
        <nixda>noob</nixda>
      </row>
    </table>
    ' AS xml)   
    )
) AS myTempTable(myXmlColumn)
;

Puoi anche controllare se un campo è contenuto in un testo XML, facendo

 ,xmlexists('//xmlEncodeTest[1]' PASSING BY REF myTempTable.myXmlColumn) AS c1e

ad esempio quando si passa un valore XML a una procedura/funzione memorizzata per CRUD.(vedi sopra)

Inoltre, nota che il modo corretto per passare un valore null in XML è <elementName xsi:nil="true" /> e non <elementName /> o niente. Non esiste un modo corretto per passare NULL negli attributi (puoi solo omettere l'attributo, ma diventa difficile/lento dedurre il numero di colonne e i loro nomi in un set di dati di grandi dimensioni).

per esempio.

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<table>
    <row column1="a" column2="3" />
    <row column1="b" column2="4" column3="true" />
</table>

(è più compatto, ma pessimo se devi importarlo, specialmente se da file XML con più GB di dati - vedi un meraviglioso esempio di questo nel dump dei dati di StackOverflow)

SELECT 
     myTempTable.myXmlColumn
    ,(xpath('//@column1', myTempTable.myXmlColumn))[1]::text AS c1
    ,(xpath('//@column2', myTempTable.myXmlColumn))[1]::text AS c2
    ,(xpath('//@column3', myTempTable.myXmlColumn))[1]::text AS c3
    ,xmlexists('//@column3' PASSING BY REF myTempTable.myXmlColumn) AS c3e
    ,case when (xpath('//@column3', myTempTable.myXmlColumn))[1]::text is null then 1 else 0 end AS is_null 
FROM unnest(xpath('//row', '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<table>
    <row column1="a" column2="3" />
    <row column1="b" column2="4" column3="true" />
</table>'
))  AS myTempTable(myXmlColumn)