Esiste, a partire dalla v2016, una soluzione tramite FROM OPENJSON()
:
DECLARE @str VARCHAR(100) = 'val1,val2,val3';
SELECT *
FROM OPENJSON('["' + REPLACE(@str,',','","') + '"]');
Il risultato
key value type
0 val1 1
1 val2 1
2 val3 1
La documentazione dice chiaramente:
Quando OPENJSON analizza un array JSON, la funzione restituisce gli indici degli elementi nel testo JSON come chiavi.
Per il tuo caso questo era:
SELECT 'z_y_x' AS splitIt
INTO #split UNION
SELECT 'a_b_c'
DECLARE @delimiter CHAR(1)='_';
SELECT *
FROM #split
CROSS APPLY OPENJSON('["' + REPLACE(splitIt,@delimiter,'","') + '"]') s
WHERE s.[key]=1; --zero based
Speriamo che le future versioni di STRING_SPLIT()
includerà queste informazioni
AGGIORNAMENTO Test delle prestazioni, confronto con il popolare Jeff-Moden-splitter
Prova questo:
USE master;
GO
CREATE DATABASE dbTest;
GO
USE dbTest;
GO
--Jeff Moden's splitter
CREATE FUNCTION [dbo].[DelimitedSplit8K](@pString VARCHAR(8000), @pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l
;
GO
--Avoid first call bias
SELECT * FROM dbo.DelimitedSplit8K('a,b,c',',');
GO
--Table to keep the results
CREATE TABLE Results(ID INT IDENTITY,ResultSource VARCHAR(100),durationMS INT, RowsCount INT);
GO
--Table with strings to split
CREATE TABLE dbo.DelimitedItems(ID INT IDENTITY,DelimitedNString nvarchar(4000),DelimitedString varchar(8000));
GO
--Ottieni righe con stringhe miste casualmente di 100 elementi
--Prova a giocare con il conteggio delle righe (conta dietro GO) e il conteggio con TOP
INSERT INTO DelimitedItems(DelimitedNString)
SELECT STUFF((
SELECT TOP 100 ','+REPLACE(v.[name],',',';')
FROM master..spt_values v
WHERE LEN(v.[name])>0
ORDER BY NewID()
FOR XML PATH('')),1,1,'')
--Keep it twice in varchar and nvarchar
UPDATE DelimitedItems SET DelimitedString=DelimitedNString;
GO 500 --create 500 differently mixed rows
--Le prove
DECLARE @d DATETIME2;
SET @d = SYSUTCDATETIME();
SELECT DI.ID, DS.Item, DS.ItemNumber
INTO #TEMP
FROM dbo.DelimitedItems DI
CROSS APPLY dbo.DelimitedSplit8K(DI.DelimitedNString,',') DS;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'delimited8K with NVARCHAR(4000)'
,(SELECT COUNT(*) FROM #TEMP) AS RowCountInTemp
,DATEDIFF(MILLISECOND,@d,SYSUTCDATETIME()) AS Duration_NV_ms_delimitedSplit8K
SET @d = SYSUTCDATETIME();
SELECT DI.ID, DS.Item, DS.ItemNumber
INTO #TEMP2
FROM dbo.DelimitedItems DI
CROSS APPLY dbo.DelimitedSplit8K(DI.DelimitedString,',') DS;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'delimited8K with VARCHAR(8000)'
,(SELECT COUNT(*) FROM #TEMP2) AS RowCountInTemp
,DATEDIFF(MILLISECOND,@d,SYSUTCDATETIME()) AS Duration_V_ms_delimitedSplit8K
SET @d = SYSUTCDATETIME();
SELECT DI.ID, OJ.[Value] AS Item, OJ.[Key] AS ItemNumber
INTO #TEMP3
FROM dbo.DelimitedItems DI
CROSS APPLY OPENJSON('["' + REPLACE(DI.DelimitedNString,',','","') + '"]') OJ;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'OPENJSON with NVARCHAR(4000)'
,(SELECT COUNT(*) FROM #TEMP3) AS RowCountInTemp
,DATEDIFF(MILLISECOND,@d,SYSUTCDATETIME()) AS Duration_NV_ms_OPENJSON
SET @d = SYSUTCDATETIME();
SELECT DI.ID, OJ.[Value] AS Item, OJ.[Key] AS ItemNumber
INTO #TEMP4
FROM dbo.DelimitedItems DI
CROSS APPLY OPENJSON('["' + REPLACE(DI.DelimitedString,',','","') + '"]') OJ;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'OPENJSON with VARCHAR(8000)'
,(SELECT COUNT(*) FROM #TEMP4) AS RowCountInTemp
,DATEDIFF(MILLISECOND,@d,SYSUTCDATETIME()) AS Duration_V_ms_OPENJSON
GO
SELECT * FROM Results;
GO
--Pulisci
DROP TABLE #TEMP;
DROP TABLE #TEMP2;
DROP TABLE #TEMP3;
DROP TABLE #TEMP4;
USE master;
GO
DROP DATABASE dbTest;
Risultati:
200 articoli in 500 righe
1220 delimited8K with NVARCHAR(4000)
274 delimited8K with VARCHAR(8000)
417 OPENJSON with NVARCHAR(4000)
443 OPENJSON with VARCHAR(8000)
100 articoli in 500 righe
421 delimited8K with NVARCHAR(4000)
140 delimited8K with VARCHAR(8000)
213 OPENJSON with NVARCHAR(4000)
212 OPENJSON with VARCHAR(8000)
100 articoli in 5 righe
10 delimited8K with NVARCHAR(4000)
5 delimited8K with VARCHAR(8000)
3 OPENJSON with NVARCHAR(4000)
4 OPENJSON with VARCHAR(8000)
5 articoli in 500 righe
32 delimited8K with NVARCHAR(4000)
30 delimited8K with VARCHAR(8000)
28 OPENJSON with NVARCHAR(4000)
24 OPENJSON with VARCHAR(8000)
--unlimited length (possibile solo con OPENJSON
)--Senza una clausola TOP durante la compilazione
--risulta in circa 500 elementi in 500 righe
1329 OPENJSON with NVARCHAR(4000)
1117 OPENJSON with VARCHAR(8000)
Fatto:
- la popolare funzione splitter non ama
NVARCHAR
- la funzione è limitata a stringhe con un volume di 8k byte
- Solo il caso con molti elementi e molte righe in
VARCHAR
lascia che la funzione splitter sia avanti. - In tutti gli altri casi
OPENJSON
sembra essere più o meno veloce... OPENJSON
può gestire conteggi (quasi) illimitatiOPENJSON
richieste per la v2016- Tutti stanno aspettando
STRING_SPLIT
con la posizione
UPDATE Aggiunto STRING_SPLIT al test
Nel frattempo eseguo nuovamente il test con altre due sezioni di test usando STRING_SPLIT()
. Come posizione ho dovuto restituire un valore codificato in quanto questa funzione non restituisce l'indice della parte.
In tutti i casi testati OPENJSON
era vicino a STRING_SPLIT
e spesso più veloce:
5 articoli in 1000 righe
250 delimited8K with NVARCHAR(4000)
124 delimited8K with VARCHAR(8000) --this function is best with many rows in VARCHAR
203 OPENJSON with NVARCHAR(4000)
204 OPENJSON with VARCHAR(8000)
235 STRING_SPLIT with NVARCHAR(4000)
234 STRING_SPLIT with VARCHAR(8000)
200 articoli in 30 righe
140 delimited8K with NVARCHAR(4000)
31 delimited8K with VARCHAR(8000)
47 OPENJSON with NVARCHAR(4000)
31 OPENJSON with VARCHAR(8000)
47 STRING_SPLIT with NVARCHAR(4000)
31 STRING_SPLIT with VARCHAR(8000)
100 articoli in 10.000 righe
8145 delimited8K with NVARCHAR(4000)
2806 delimited8K with VARCHAR(8000) --fast with many rows!
5112 OPENJSON with NVARCHAR(4000)
4501 OPENJSON with VARCHAR(8000)
5028 STRING_SPLIT with NVARCHAR(4000)
5126 STRING_SPLIT with VARCHAR(8000)