2012-04-11 36 views
8

用下面的數據組數據由分組列值的變化

create table #ph (product int, [date] date, price int) 
insert into #ph select 1, '20120101', 1 
insert into #ph select 1, '20120102', 1 
insert into #ph select 1, '20120103', 1 
insert into #ph select 1, '20120104', 1 
insert into #ph select 1, '20120105', 2 
insert into #ph select 1, '20120106', 2 
insert into #ph select 1, '20120107', 2 
insert into #ph select 1, '20120108', 2 
insert into #ph select 1, '20120109', 1 
insert into #ph select 1, '20120110', 1 
insert into #ph select 1, '20120111', 1 
insert into #ph select 1, '20120112', 1 

我想產生以下輸出:

product | date_from | date_to | price 
    1  | 20120101 | 20120105 | 1 
    1  | 20120105 | 20120109 | 2 
    1  | 20120109 | 20120112 | 1 

如果我組由價格,並顯示最大和最短日期,然後我會得到以下這是不是我想要的(見日期重疊)。

product | date_from | date_to | price 
    1  | 20120101 | 20120112 | 1 
    1  | 20120105 | 20120108 | 2 

所以基本上我想要做的就是根據組列產品和價格的數據的階躍變化。

達到此目的的最簡潔方法是什麼?

+4

這是一個實例所謂的'差距和島嶼'問題,fyi。 – AakashM 2012-04-11 16:32:04

+0

@AakashM會看一看,我試過搜索,但沒有這個問題的明確定義。謝謝 – MrEdmundo 2012-04-11 16:34:21

+0

np。有一個'數字表'(在這種情況下,'日期表')將非常有幫助。 – AakashM 2012-04-11 16:36:08

回答

18

有解決此類問題的(或多或少)公知的技術,涉及到兩個ROW_NUMBER()調用,就像這樣:

WITH marked AS (
    SELECT 
    *, 
    grp = ROW_NUMBER() OVER (PARTITION BY product  ORDER BY date) 
     - ROW_NUMBER() OVER (PARTITION BY product, price ORDER BY date) 
    FROM #ph 
) 
SELECT 
    product, 
    date_from = MIN(date), 
    date_to = MAX(date), 
    price 
FROM marked 
GROUP BY 
    product, 
    price, 
    grp 
ORDER BY 
    product, 
    MIN(date) 

輸出:

product date_from date_to  price 
------- ---------- ------------- ----- 
1  2012-01-01 2012-01-04  1  
1  2012-01-05 2012-01-08  2  
1  2012-01-09 2012-01-12  1  
+0

謝謝,我剛纔看了一下我最終實現的內容,它是一樣的,但是我在兩個不同的CTE中做了它,我沒有想到在一箇中使用減法。謝謝。 – MrEdmundo 2012-04-30 07:24:52

0

一個解決方案我想出了這是比較「乾淨」的是:

;with cte_sort (product, [date], price, [row]) 
as 
    (select product, [date], price, row_number() over(partition by product order by [date] asc) as row 
    from #ph) 

select a.product, a.[date] as date_from, c.[date] as date_to, a.price 
from cte_sort a 
left outer join cte_sort b on a.product = b.product and (a.row+1) = b.row and a.price = b.price 
outer apply (select top 1 [date] from cte_sort z where z.product = a.product and z.row > a.row order by z.row) c 
where b.row is null 
order by a.[date] 

我用了一個CTE與row_number,因爲你的話就不用擔心,如果你使用的任何日期是否缺少功能如dateadd。你顯然只需要外部申請,如果你想有date_to列(我這樣做)。

這個解決方案確實解決了我的問題,但是我有一個小問題,讓它在我500萬行的桌子上快速執行。

2

我是新來這個論壇,所以希望我的貢獻是有幫助的。

如果你真的不想使用CTE(儘管我認爲這可能是最好的方法),你可以使用基於集合的代碼獲得解決方案。您將需要測試此代碼的性能!

我已經添加了一個額外的臨時表,這樣我就可以爲每個記錄使用一個唯一的標識符,但是我懷疑你已經在你的源表中有這個列。所以繼承了臨時表。

If Exists (SELECT Name FROM tempdb.sys.tables WHERE name LIKE '#phwithId%') 
     DROP TABLE #phwithId  

    CREATE TABLE #phwithId 
    (
     SaleId INT 
     , ProductID INT 
     , Price Money 
     , SaleDate Date 
    ) 
    INSERT INTO #phwithId SELECT row_number() over(partition by product order by [date] asc) as SalesId, Product, Price, Date FROM ph 

現在Select語句

SELECT 
     productId 
     , date_from 
     , date_to 
     , Price 
    FROM 
     ( 
      SELECT 
       dfr.ProductId 
       , ROW_NUMBER() OVER (PARTITION BY ProductId ORDER BY ChangeDate) AS rowno1   
       , ChangeDate AS date_from 
       , dfr.Price 
      FROM 
       (  
        SELECT 
         sl1.ProductId AS ProductId 
         , sl1.SaleDate AS ChangeDate 
         , sl1.price 
        FROM 
         #phwithId sl1 
        LEFT JOIN 
         #phwithId sl2 
         ON sl1.SaleId = sl2.SaleId + 1 
        WHERE 
         sl1.Price <> sl2.Price OR sl2.Price IS NULL 
       ) dfr 
     ) da1 
    LEFT JOIN 
     ( 
      SELECT 
       ROW_NUMBER() OVER (PARTITION BY ProductId ORDER BY ChangeDate) AS rowno2 
       , ChangeDate AS date_to  
      FROM 
       ( 
        SELECT 
         sl1.ProductId 
         , sl1.SaleDate AS ChangeDate 
        FROM 
         #phwithId sl1 
        LEFT JOIN 
         #phwithId sl3 
         ON sl1.SaleId = sl3.SaleId - 1 
        WHERE 
         sl1.Price <> sl3.Price OR sl3.Price IS NULL   
       ) dto 

     ) da2 
     ON da1.rowno1 = da2.rowno2 

的主體通過綁定數據源1個記錄偏移(+或 - ),我們可以找出當價格桶改變,那麼它只是一個事將桶的開始日期和結束日期恢復爲單個記錄。

所有的一切都很煩躁,我不確定它會提供更好的表現,但我喜歡挑戰。

-1
Create function [dbo].[AF_TableColumns](@table_name nvarchar(55)) 
returns nvarchar(4000) as 
begin 
declare @str nvarchar(4000) 
    select @str = cast(rtrim(ltrim(column_name)) as nvarchar(500)) + coalesce('   ' + @str , '   ') 
    from information_schema.columns 
    where table_name = @table_name 
    group by table_name, column_name, ordinal_position 
    order by ordinal_position DESC 
return @str 
end 

--select dbo.AF_TableColumns('YourTable') Select * from YourTable