私は現在、次のような1分間隔で保存されているいくつかのデータを使用しています。
CREATE TABLE #MinuteData
(
[Id] INT ,
[MinuteBar] DATETIME ,
[Open] NUMERIC(12, 6) ,
[High] NUMERIC(12, 6) ,
[Low] NUMERIC(12, 6) ,
[Close] NUMERIC(12, 6)
);
INSERT INTO #MinuteData
( [Id], [MinuteBar], [Open], [High], [Low], [Close] )
VALUES ( 1, '2015-01-01 17:00:00', 1.557870, 1.557880, 1.557870, 1.557880 ),
( 2, '2015-01-01 17:01:00', 1.557900, 1.557900, 1.557880, 1.557880 ),
( 3, '2015-01-01 17:02:00', 1.557960, 1.558070, 1.557960, 1.558040 ),
( 4, '2015-01-01 17:03:00', 1.558080, 1.558100, 1.558040, 1.558050 ),
( 5, '2015-01-01 17:04:00', 1.558050, 1.558100, 1.558020, 1.558030 ),
( 6, '2015-01-01 17:05:00', 1.558580, 1.558710, 1.557870, 1.557950 ),
( 7, '2015-01-01 17:06:00', 1.557910, 1.558120, 1.557910, 1.557990 ),
( 8, '2015-01-01 17:07:00', 1.557940, 1.558250, 1.557940, 1.558170 ),
( 9, '2015-01-01 17:08:00', 1.558140, 1.558200, 1.558080, 1.558120 ),
( 10, '2015-01-01 17:09:00', 1.558110, 1.558140, 1.557970, 1.557970 );
SELECT *
FROM #MinuteData;
DROP TABLE #MinuteData;
値は為替レートを追跡するため、分間隔(バー)ごとに、分が始まるとOpen
価格が、分が終わるとClose
価格があります。 High
とLow
の値は、個々の1分間の最高レートと最低レートを表します。
望ましい出力
このデータを5分間隔に再フォーマットして、次の出力を生成しようとしています。
MinuteBar Open Close Low High
2015-01-01 17:00:00.000 1.557870 1.558030 1.557870 1.558100
2015-01-01 17:05:00.000 1.558580 1.557970 1.557870 1.558710
これは、5の最初の分からOpen
値、5の最後の分からClose
値を取ります。High
およびLow
値は、 5分間の最高high
および最低low
レート。
現在の解決策
私はこれを行うソリューションを持っています(以下)が、id
の値と自己結合に依存しているため、洗練されていないように感じます。また、はるかに大きなデータセットで実行する予定なので、可能であればより効率的な方法で実行することを検討していました。
-- Create a column to allow grouping in 5 minute Intervals
SELECT Id, MinuteBar, [Open], High, Low, [Close],
DATEDIFF(MINUTE, '2015-01-01T00:00:00', MinuteBar)/5 AS Interval
INTO #5MinuteData
FROM #MinuteData
ORDER BY minutebar
-- Group by inteval and aggregate prior to self join
SELECT Interval ,
MIN(MinuteBar) AS MinuteBar ,
MIN(Id) AS OpenId ,
MAX(Id) AS CloseId ,
MIN(Low) AS Low ,
MAX(High) AS High
INTO #DataMinMax
FROM #5MinuteData
GROUP BY Interval;
-- Self join to get the Open and Close values
SELECT t1.Interval ,
t1.MinuteBar ,
tOpen.[Open] ,
tClose.[Close] ,
t1.Low ,
t1.High
FROM #DataMinMax t1
INNER JOIN #5MinuteData tOpen ON tOpen.Id = OpenId
INNER JOIN #5MinuteData tClose ON tClose.Id = CloseId;
DROP TABLE #DataMinMax
DROP TABLE #5MinuteData
リワークの試み
上記のクエリの代わりに、 FIRST_VALUE および LAST_VALUE を使用することを検討してきました。私がやっているグループで作業しています。私がやろうとしているものよりも良い解決策があるかもしれないので、私は提案を受け入れています。現在、私はこれをしようとしています:
SELECT MIN(MinuteBar) MinuteBar5 ,
FIRST_VALUE([Open]) OVER (ORDER BY MinuteBar) AS Opening,
MAX(High) AS High ,
MIN(Low) AS Low ,
LAST_VALUE([Close]) OVER (ORDER BY MinuteBar) AS Closing ,
DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 AS Interval
FROM #MinuteData
GROUP BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5
これらの行を削除するとクエリが実行されるため、FIRST_VALUE
およびLAST_VALUE
に関連する以下のエラーが表示されます。
列 '#MinuteData.MinuteBar'は、集計関数またはGROUP BY句のいずれにも含まれていないため、選択リストでは無効です。
SELECT
MIN(MinuteBar) AS MinuteBar5,
Opening,
MAX(High) AS High,
MIN(Low) AS Low,
Closing,
Interval
FROM
(
SELECT FIRST_VALUE([Open]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 ORDER BY MinuteBar) AS Opening,
FIRST_VALUE([Close]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 ORDER BY MinuteBar DESC) AS Closing,
DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5 AS Interval,
*
FROM #MinuteData
) AS T
GROUP BY Interval, Opening, Closing
現在のソリューションに近いソリューション。間違った箇所が2つあります。
LAST_VALUEは、クエリで指定されていない現在のウィンドウの最後の値です。デフォルトのウィンドウは、現在のパーティションの最初の行から現在の行までの行です。 FIRST_VALUEを播種解除の順序で使用するか、ウィンドウを指定できます
LAST_VALUE([Close]) OVER (PARTITION BY DATEDIFF(MINUTE, '2015-01-01 00:00:00', MinuteBar) / 5
ORDER BY MinuteBar
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Closing,
一時テーブルなしでそれを行う1つの方法を次に示します。
;WITH CTEInterval AS
( -- This replaces your first temporary table (#5MinuteData)
SELECT [Id],
[MinuteBar],
[Open],
[High],
[Low],
[Close],
DATEPART(MINUTE, MinuteBar)/5 AS Interval
FROM #MinuteData
), CTEOpenClose as
( -- this is instead of your second temporary table (#DataMinMax)
SELECT [Id],
[MinuteBar],
FIRST_VALUE([Open]) OVER (PARTITION BY Interval ORDER BY MinuteBar) As [Open],
[High],
[Low],
FIRST_VALUE([Close]) OVER (PARTITION BY Interval ORDER BY MinuteBar DESC) As [Close],
Interval
FROM CTEInterval
)
-- This is the final select
SELECT MIN([MinuteBar]) as [MinuteBar],
AVG([Open]) as [Open], -- All values of [Open] in the same interval are the same...
AVG([Close]) as [Close], -- All values of [Close] in the same interval are the same...
MIN([Low]) as [Low],
MAX([High]) as [High]
FROM CTEOpenClose
GROUP BY Interval
結果:
MinuteBar Open Close Low High
2015-01-01 17:00:00.000 1.557870 1.558030 1.557870 1.558100
2015-01-01 17:05:00.000 1.558580 1.557970 1.557870 1.558710
;with cte
as
(--this can be your permanent table with intervals ,rather than generating on fly
select cast('2015-01-01 17:00:00.000' as datetime) as interval,dateadd(mi,5,'2015-01-01 17:00:00.000') as nxtinterval
union all
select dateadd(mi,5,interval),dateadd(mi,5,nxtinterval) from cte
where interval<='2015-01-01 17:45:00.000'
)
,finalcte
as
(select minutebar,
low,high,
dense_rank() over (order by interval,nxtinterval) as grpd,
last_value([close]) over ( partition by interval,nxtinterval order by interval,nxtinterval) as [close],
first_value([open]) over (partition by interval,nxtinterval order by interval,nxtinterval) as [open]
from cte c
join
#minutedata m
on m.minutebar between interval and nxtinterval
)
select
min(minutebar) as minutebar,
min(low) as 'low',
max(high) as 'High',
max([open]) as 'open',
max([close]) as 'close'
from finalcte
group by grpd