次のデータが与えられます:
create table #histories
(
username varchar(10),
account varchar(10),
assigned date
);
insert into #histories
values
('PHIL','ACCOUNT1','2017-01-04'),
('PETER','ACCOUNT1','2017-01-15'),
('DAVE','ACCOUNT1','2017-03-04'),
('ANDY','ACCOUNT1','2017-05-06'),
('DAVE','ACCOUNT1','2017-05-07'),
('FRED','ACCOUNT1','2017-05-08'),
('JAMES','ACCOUNT1','2017-08-05'),
('DAVE','ACCOUNT2','2017-01-02'),
('PHIL','ACCOUNT2','2017-01-18'),
('JOSH','ACCOUNT2','2017-04-08'),
('JAMES','ACCOUNT2','2017-04-09'),
('DAVE','ACCOUNT2','2017-05-06'),
('PHIL','ACCOUNT2','2017-05-07') ;
...特定のユーザーがアカウントにいつ割り当てられたかを表します。
私は、毎月の最終日に特定のアカウントを誰が所有したか(割り当てられた日付は、アカウントが所有権を譲渡した日付です)を確立しようとしています欠落している月末が入力されています(おそらく私が利用できる便利なdates
テーブルから作成され、有用な列DateKey
、Date
およびLastDayOfMonth
、[courtesy of @AaronBertrand]) 1 。
望ましい結果は次のとおりです。
PETER, ACCOUNT1, 2017-01-31
PETER, ACCOUNT1, 2017-02-28
DAVE, ACCOUNT1, 2017-03-31
DAVE, ACCOUNT1, 2017-04-30
FRED, ACCOUNT1, 2017-05-31
FRED, ACCOUNT1, 2017-06-30
FRED, ACCOUNT1, 2017-07-31
JAMES, ACCOUNT1, 2017-08-31
PHIL, ACCOUNT2, 2017-01-31
PHIL, ACCOUNT2, 2017-02-28
PHIL, ACCOUNT2, 2017-03-31
JAMES, ACCOUNT2, 2017-04-30
PHIL, ACCOUNT2, 2017-05-31
これの最初の部分をウィンドウ関数で実行するのは簡単です。私が苦労している "欠落している"行を追加しています。
この問題への1つのアプローチは、次のことを行うことです。
LEAD
をエミュレートします。これにはAPPLY
またはsuqueryを使用できます。結果を確定的にするために、テストデータを少し変更しました。インデックスも追加しました:
create table #histories
(
username varchar(10),
account varchar(10),
assigned date
);
insert into #histories
values
('PHIL','ACCOUNT1','2017-01-04'),
('PETER','ACCOUNT1','2017-01-15'),
('DAVE','ACCOUNT1','2017-03-04'),
('ANDY','ACCOUNT1','2017-05-06'),
('DAVE','ACCOUNT1','2017-05-07'),
('FRED','ACCOUNT1','2017-05-08'),
('JAMES','ACCOUNT1','2017-08-05'),
('DAVE','ACCOUNT2','2017-01-02'),
('PHIL','ACCOUNT2','2017-01-18'),
('JOSH','ACCOUNT2','2017-04-08'), -- changed this date to have deterministic results
('JAMES','ACCOUNT2','2017-04-09'),
('DAVE','ACCOUNT2','2017-05-06'),
('PHIL','ACCOUNT2','2017-05-07') ;
-- make life easy
create index gotta_go_fast ON #histories (account, assigned);
これは、これまでで最も怠惰な日付ディメンションテーブルです。
create table #date_dim_months_only (
month_date date,
primary key (month_date)
);
-- put 2500 month ends into table
INSERT INTO #date_dim_months_only WITH (TABLOCK)
SELECT DATEADD(DAY, -1, DATEADD(MONTH, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)), '20000101'))
FROM master..spt_values;
ステップ1では、LEAD
をエミュレートする方法がたくさんあります。これが1つの方法です。
SELECT
h1.username
, h1.account
, h1.assigned
, next_date.assigned
FROM #histories h1
OUTER APPLY (
SELECT TOP 1 h2.assigned
FROM #histories h2
WHERE h1.account = h2.account
AND h1.assigned < h2.assigned
ORDER BY h2.assigned ASC
) next_date;
ステップ2では、NULL値を別の値に変更する必要があります。各アカウントの最終月を含めたいので、開始日に1か月を追加するだけで十分です。
ISNULL(next_date.assigned, DATEADD(MONTH, 1, h1.assigned))
ステップ3では、日付ディメンションテーブルに結合できます。ディメンションテーブルの列は、結果セットに必要な列とまったく同じです。
INNER JOIN #date_dim_months_only dd ON
dd.month_date >= h1.assigned AND
dd.month_date < ISNULL(next_date.assigned, DATEADD(MONTH, 1, h1.assigned))
まとめると、クエリが気に入らなかった。 OUTER APPLY
とINNER JOIN
を組み合わせると、結合順序に問題が発生する可能性があります。必要な結合順序を取得するには、サブクエリを使用して書き直します。
SELECT
hist.username
, hist.account
, dd.month_date
FROM
(
SELECT
h1.username
, h1.account
, h1.assigned
, ISNULL(
(SELECT TOP 1 h2.assigned
FROM #histories h2
WHERE h1.account = h2.account
AND h1.assigned < h2.assigned
ORDER BY h2.assigned ASC
)
, DATEADD(MONTH, 1, h1.assigned)
) next_assigned
FROM #histories h1
) hist
INNER JOIN #date_dim_months_only dd ON
dd.month_date >= hist.assigned AND
dd.month_date < hist.next_assigned;
データの量がわからないので、問題にはならないかもしれません。しかし、計画は私がそれをどのようにしたいかに見えます:
結果はあなたのものと一致します:
╔══════════╦══════════╦════════════╗
║ username ║ account ║ month_date ║
╠══════════╬══════════╬════════════╣
║ PETER ║ ACCOUNT1 ║ 2017-01-31 ║
║ PETER ║ ACCOUNT1 ║ 2017-02-28 ║
║ DAVE ║ ACCOUNT1 ║ 2017-03-31 ║
║ DAVE ║ ACCOUNT1 ║ 2017-04-30 ║
║ FRED ║ ACCOUNT1 ║ 2017-05-31 ║
║ FRED ║ ACCOUNT1 ║ 2017-06-30 ║
║ FRED ║ ACCOUNT1 ║ 2017-07-31 ║
║ JAMES ║ ACCOUNT1 ║ 2017-08-31 ║
║ PHIL ║ ACCOUNT2 ║ 2017-01-31 ║
║ PHIL ║ ACCOUNT2 ║ 2017-02-28 ║
║ PHIL ║ ACCOUNT2 ║ 2017-03-31 ║
║ JAMES ║ ACCOUNT2 ║ 2017-04-30 ║
║ PHIL ║ ACCOUNT2 ║ 2017-05-31 ║
╚══════════╩══════════╩════════════╝
ここでは、カレンダーテーブルを使用していませんが、自然数テーブルnums.dbo.numsを使用しています(そうでない場合でも簡単に生成できます)
あなたのデータにはこれらの2つの行が含まれているので、私はあなたのものと少し異なる答えがあります( 'JOSH' <-> 'JAMES'):
('JOSH','ACCOUNT2','2017-04-09'),
('JAMES','ACCOUNT2','2017-04-09'),
同じアカウントと割り当てられた日付を使用していて、どちらを採用する必要があるかを正確に指定していませんでした。
declare @eom table(account varchar(10), dt date);
with acc_mm AS
(
select account, min(assigned) as min_dt, max(assigned) as max_dt
from #histories
group by account
),
acc_mm1 AS
(
select account,
dateadd(month, datediff(month, '19991231', min_dt), '19991231') as start_dt,
dateadd(month, datediff(month, '19991231', max_dt), '19991231') as end_dt
from acc_mm
)
insert into @eom (account, dt)
select account, dateadd(month, n - 1, start_dt)
from acc_mm1
join nums.dbo.nums
on n - 1 <= datediff(month, start_dt, end_dt);
select eom.dt, eom.account, a.username
from @eom eom
cross apply(select top 1 *
from #histories h
where eom.account = h.account
and h.assigned <= eom.dt
order by h.assigned desc) a
order by eom.account, eom.dt;
勝利のためのトライアングル参加!
SELECT account,EndOfMonth,username
FROM (
SELECT Ends.*, h.*
,ROW_NUMBER() OVER (PARTITION BY h.account,Ends.EndOfMonth ORDER BY h.assigned DESC) AS RowNumber
FROM (
SELECT [Year],[Month],MAX(DATE) AS EndOfMonth
FROM #dim
GROUP BY [Year],[Month]
) Ends
CROSS JOIN (
SELECT account, MAX(assigned) AS MaxAssigned
FROM #histories
GROUP BY account
) ac
JOIN #histories h ON h.account = ac.account
AND Year(h.assigned) = ends.[Year]
AND Month(h.assigned) <= ends.[Month] --triangle join for the win!
AND EndOfMonth < DATEADD(month, 1, Maxassigned)
) Results
WHERE RowNumber = 1
ORDER BY account,EndOfMonth;
結果は次のとおりです。
account EndOfMonth username
ACCOUNT1 2017-01-31 PETER
ACCOUNT1 2017-02-28 PETER
ACCOUNT1 2017-03-31 DAVE
ACCOUNT1 2017-04-30 DAVE
ACCOUNT1 2017-05-31 FRED
ACCOUNT1 2017-06-30 FRED
ACCOUNT1 2017-07-31 FRED
ACCOUNT1 2017-08-31 JAMES
ACCOUNT2 2017-01-31 PHIL
ACCOUNT2 2017-02-28 PHIL
ACCOUNT2 2017-03-31 PHIL
ACCOUNT2 2017-04-30 JAMES
ACCOUNT2 2017-05-31 PHIL
I/OおよびTIME統計(論理読み取り後にすべてのゼロ値を切り捨て):
(13 row(s) affected)
Table 'Worktable'. Scan count 3, logical reads 35.
Table 'Workfile'. Scan count 0, logical reads 0.
Table '#dim'. Scan count 1, logical reads 4.
Table '#histories'. Scan count 1, logical reads 1.
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 3 ms.
クエリを実行して必要な 'tempテーブルを作成し、T-SQLステートメントをテストします。
IF OBJECT_ID('tempdb..#histories') IS NOT NULL
DROP TABLE #histories
CREATE TABLE #histories (
username VARCHAR(10)
,account VARCHAR(10)
,assigned DATE
);
INSERT INTO #histories
VALUES
('PHIL','ACCOUNT1','2017-01-04'),
('PETER','ACCOUNT1','2017-01-15'),
('DAVE','ACCOUNT1','2017-03-04'),
('ANDY','ACCOUNT1','2017-05-06'),
('DAVE','ACCOUNT1','2017-05-07'),
('FRED','ACCOUNT1','2017-05-08'),
('JAMES','ACCOUNT1','2017-08-05'),
('DAVE','ACCOUNT2','2017-01-02'),
('PHIL','ACCOUNT2','2017-01-18'),
('JOSH','ACCOUNT2','2017-04-08'),
('JAMES','ACCOUNT2','2017-04-09'),
('DAVE','ACCOUNT2','2017-05-06'),
('PHIL','ACCOUNT2','2017-05-07');
DECLARE @StartDate DATE = '20170101'
,@NumberOfYears INT = 2;
-- prevent set or regional settings from interfering with
-- interpretation of dates / literals
SET DATEFIRST 7;
SET DATEFORMAT mdy;
SET LANGUAGE US_ENGLISH;
DECLARE @CutoffDate DATE = DATEADD(YEAR, @NumberOfYears, @StartDate);
-- this is just a holding table for intermediate calculations:
IF OBJECT_ID('tempdb..#dim') IS NOT NULL
DROP TABLE #dim
CREATE TABLE #dim (
[date] DATE PRIMARY KEY
,[day] AS DATEPART(DAY, [date])
,[month] AS DATEPART(MONTH, [date])
,FirstOfMonth AS CONVERT(DATE, DATEADD(MONTH, DATEDIFF(MONTH, 0, [date]), 0))
,[MonthName] AS DATENAME(MONTH, [date])
,[week] AS DATEPART(WEEK, [date])
,[ISOweek] AS DATEPART(ISO_WEEK, [date])
,[DayOfWeek] AS DATEPART(WEEKDAY, [date])
,[quarter] AS DATEPART(QUARTER, [date])
,[year] AS DATEPART(YEAR, [date])
,FirstOfYear AS CONVERT(DATE, DATEADD(YEAR, DATEDIFF(YEAR, 0, [date]), 0))
,Style112 AS CONVERT(CHAR(8), [date], 112)
,Style101 AS CONVERT(CHAR(10), [date], 101)
);
-- use the catalog views to generate as many rows as we need
INSERT #dim ([date])
SELECT d
FROM (
SELECT d = DATEADD(DAY, rn - 1, @StartDate)
FROM (
SELECT TOP (DATEDIFF(DAY, @StartDate, @CutoffDate)) rn = ROW_NUMBER() OVER (
ORDER BY s1.[object_id]
)
FROM sys.all_objects AS s1
CROSS JOIN sys.all_objects AS s2
-- on my system this would support > 5 million days
ORDER BY s1.[object_id]
) AS x
) AS y;
/* The actual SELECT statement to get the results we want! */
SET STATISTICS IO, TIME ON;
SELECT account,EndOfMonth,username
FROM (
SELECT Ends.*, h.*
,ROW_NUMBER() OVER (PARTITION BY h.account,Ends.EndOfMonth ORDER BY h.assigned DESC) AS RowNumber
FROM (
SELECT [Year],[Month],MAX(DATE) AS EndOfMonth
FROM #dim
GROUP BY [Year],[Month]
) Ends
CROSS JOIN (
SELECT account, MAX(assigned) AS MaxAssigned
FROM #histories
GROUP BY account
) ac
JOIN #histories h ON h.account = ac.account
AND Year(h.assigned) = ends.[Year]
AND Month(h.assigned) <= ends.[Month] --triangle join for the win!
AND EndOfMonth < DATEADD(month, 1, Maxassigned)
) Results
WHERE RowNumber = 1
ORDER BY account,EndOfMonth;
SET STATISTICS IO, TIME OFF;
--IF OBJECT_ID('tempdb..#histories') IS NOT NULL DROP TABLE #histories
--IF OBJECT_ID('tempdb..#dim') IS NOT NULL DROP TABLE #dim
これは決してきれいに見えるソリューションではありませんが、あなたが探している結果を提供しているようです(他の人があなたのために素晴らしい、きれいな、完全に最適化されたクエリを持っていると確信しています)。
create table #histories
(
username varchar(10),
account varchar(10),
assigned date
);
insert into #histories
values
('PHIL','ACCOUNT1','2017-01-04'),
('PETER','ACCOUNT1','2017-01-15'),
('DAVE','ACCOUNT1','2017-03-04'),
('ANDY','ACCOUNT1','2017-05-06'),
('DAVE','ACCOUNT1','2017-05-07'),
('FRED','ACCOUNT1','2017-05-08'),
('JAMES','ACCOUNT1','2017-08-05'),
('DAVE','ACCOUNT2','2017-01-02'),
('PHIL','ACCOUNT2','2017-01-18'),
('JOSH','ACCOUNT2','2017-04-09'),
('JAMES','ACCOUNT2','2017-04-09'),
('DAVE','ACCOUNT2','2017-05-06'),
('PHIL','ACCOUNT2','2017-05-07') ;
IF (SELECT OBJECT_ID(N'tempdb..#IncompleteResults')) IS NOT NULL
DROP TABLE #IncompleteResults;
DECLARE @EOMTable TABLE ( EndOfMonth DATE );
DECLARE @DateToWrite DATE = '2017-01-31';
WHILE @DateToWrite < '2017-10-31'
BEGIN
INSERT INTO @EOMTable
( EndOfMonth )
SELECT @DateToWrite;
SELECT @DateToWrite = EOMONTH(DATEADD(MONTH, 1, @DateToWrite));
END
;
WITH cteAccountsByMonth
AS ( SELECT EndOfMonth ,
account
FROM @EOMTable e
CROSS JOIN ( SELECT DISTINCT
account
FROM #histories
) AS h
),
cteHistories
AS ( SELECT username ,
account ,
ROW_NUMBER() OVER ( PARTITION BY ( CAST(DATEPART(YEAR,
assigned) AS CHAR(4))
+ ( RIGHT('00'
+ CAST(DATEPART(MONTH,
assigned) AS VARCHAR(10)),
2) ) ), account ORDER BY assigned DESC ) AS rownum ,
CAST(DATEPART(YEAR, assigned) AS CHAR(4)) + RIGHT('00'
+ CAST(DATEPART(MONTH,
assigned) AS VARCHAR(10)),
2) AS PartialDate ,
assigned ,
EOMONTH(assigned) AS EndofMonth
FROM #histories
)
SELECT username ,
e.EndOfMonth ,
e.account
INTO #IncompleteResults
FROM cteAccountsByMonth e
LEFT JOIN cteHistories c ON e.EndOfMonth = c.EndofMonth
AND c.account = e.account
AND c.rownum = 1
SELECT CASE WHEN username IS NULL
THEN ( SELECT username
FROM #IncompleteResults i2
WHERE username IS NOT NULL
AND i.account = i2.account
AND i2.EndOfMonth = ( SELECT MAX(EndOfMonth)
FROM #IncompleteResults i3
WHERE i3.EndOfMonth < i.EndOfMonth
AND i3.account = i.account
AND i3.username IS NOT NULL
)
)
ELSE username
END AS username ,
EndOfMonth ,
account
FROM #IncompleteResults i
ORDER BY account ,
i.EndOfMonth;
質問でも触れているように、Aaron Bertrandの date dimension table を使用しました(これは、このようなシナリオに非常に便利なテーブルです)。次のコードを記述しました。
次のコードを使用して、EndOfMonth
列を#dim
テーブルに追加しました(FirstOfMonth
列の直後)。
EndOfMonth as dateadd(s,-1,dateadd(mm, datediff(m,0,[date])+1,0)),
そして解決策:
if object_id('tempdb..#temp') is not null drop table #temp
create table #temp (nr int, username varchar(100), account varchar(100), eom date)
;with lastassignedpermonth as
(
select
month(assigned) month
, account
, max(assigned) assigned
from
#histories
group by month(assigned), account
)
insert into #temp
select
distinct row_number() over (order by d.account, d.eom) nr
, h.username
, d.account
, d.eom
from (
select distinct month, cast(d.endofmonth as date) eom, t.account
from #dim d cross apply (select distinct account from #histories) t
) d
left join lastassignedpermonth l on d.month = l.month and l.assigned <= d.eom and d.account = l.account
left join #histories h on h.assigned = l.assigned and h.account = l.account
where d.eom <= dateadd(s,-1,dateadd(mm, datediff(m,0,getdate())+1,0)) -- end of current month
order by d.account, eom
-- This could have been done in one single statement with the lead() function but that is available as of SQL Server 2012
select case when t.username is null then (select username from #temp where nr = previous_username.nr) else t.username end as username, t.account, t.eom
from #temp as t cross apply (
select max(nr) nr
from #temp as t1
where t1.nr < t.nr and t1.username is not null
) as previous_username
/*
Note: You get twice JAMES and JOSH for April/ACCOUNT2, because apparently they are both assigned on the same date(2017-04-09)...
I guess your data should be cleaned up of overlapping dates.
*/