2016-08-30 49 views
1

我試圖將兩個相同表中的多個日期範圍與相同或不同的數據結合起來。 (PostgreSQL系統9. *)Postgresql。從兩個表中合併和分割日期範圍按鍵集

表結構:

CREATE TABLE "first_activities" (
    "id" int4 NOT NULL DEFAULT nextval('first_activities_id_seq'::regclass), 
    "start_time" timestamptz, 
    "end_time" timestamptz, 
    "activity_type" int2, 
    "user_id" int4 
) 
WITH (OIDS=FALSE); 
ALTER TABLE "first_activities" ADD PRIMARY KEY ("id") NOT DEFERRABLE INITIALLY IMMEDIATE; 

CREATE TABLE "second_activities" (
    "id" int4 NOT NULL DEFAULT nextval('second_activities_id_seq'::regclass), 
    "start_time" timestamptz, 
    "end_time" timestamptz, 
    "activity_type" int2, 
    "user_id" int4 
) 
WITH (OIDS=FALSE); 
ALTER TABLE "second_activities" ADD PRIMARY KEY ("id") NOT DEFERRABLE INITIALLY IMMEDIATE; 

數據在第一個表:

INSERT INTO "first_activities" VALUES 
(NULL, '2014-10-31 01:00:00', '2014-10-31 02:00:00', '3', '1'), 
(NULL, '2014-10-31 02:00:00', '2014-10-31 03:00:00', '4', '1'), 
(NULL, '2014-10-31 03:00:00', '2014-10-31 04:00:00', '2', '1'), 
(NULL, '2014-10-31 04:30:00', '2014-10-31 05:00:00', '3', '1'), 
(NULL, '2014-10-31 05:30:00', '2014-11-01 06:00:00', '4', '1'), 
(NULL, '2014-11-01 06:30:00', '2014-11-01 07:00:00', '2', '1'), 
(NULL, '2014-11-01 07:30:00', '2014-11-01 08:00:00', '1', '1'), 
(NULL, '2014-11-01 08:00:00', '2014-11-01 09:00:00', '3', '1'), 
(NULL, '2014-11-01 09:00:00', '2014-11-02 10:00:00', '4', '1'), 
(NULL, '2014-08-27 10:00:00', '2014-08-27 11:00:00', '2', '1'), 
(NULL, '2014-08-27 11:00:00', '2014-08-27 12:00:00', '1', '1'), 

數據在第二表中:

INSERT INTO "second_activities" VALUES 
(NULL, '2014-10-31 01:00:00', '2014-10-31 02:00:00', '3', '1'), 
(NULL, '2014-10-31 02:00:00', '2014-10-31 03:00:00', '4', '1'), 

-- Differece from first table 
(NULL, '2014-10-31 03:30:00', '2014-10-31 04:00:00', '1', '1'), 
(NULL, '2014-10-31 04:25:00', '2014-10-31 04:35:00', '3', '1'), 
(NULL, '2014-10-31 04:45:00', '2014-10-31 05:35:00', '3', '1'), 
-- End of Difference from first table 

(NULL, '2014-08-27 10:00:00', '2014-08-27 11:00:00', '2', '1'), 
(NULL, '2014-08-27 11:00:00', '2014-08-27 12:00:00', '1', '1'); 

如何過濾從查詢開始的結果集:

SELECT * FROM first_activities UNION ALL SELECT * from second_activities 
ORDER BY start_time ASC; 

獲得最終結果集。

最終結果:

-- merge same data by user_id and activity_type and combine with 
-- and split data with range intersection but not same user_id and acitvity_type 

-- start_time    end_time  type user_id 
'2014-10-31 01:00:00', '2014-10-31 02:00:00', '3', '1'); 
'2014-10-31 02:00:00', '2014-10-31 03:00:00', '4', '1'); 

--data dont merge. Splitting with range intersection 
'2014-10-31 03:00:00', '2014-10-31 03:30:00', '2', '1'); -- from first table 
'2014-10-31 03:30:00', '2014-10-31 04:00:00', '1', '1'); -- from second table 

-- data merged by same user_id and activity_type 
'2014-10-31 04:25:00', '2014-10-31 05:35:00', '3', '1'); 

'2014-10-31 05:30:00', '2014-11-01 06:00:00', '4', '1'); 
'2014-11-01 06:30:00', '2014-11-01 07:00:00', '2', '1'); 
'2014-11-01 07:30:00', '2014-11-01 08:00:00', '1', '1'); 
'2014-11-01 08:00:00', '2014-11-01 09:00:00', '3', '1'); 
'2014-11-01 09:00:00', '2014-11-02 10:00:00', '4', '1'); 
'2014-08-27 10:00:00', '2014-08-27 11:00:00', '2', '1'); 
'2014-08-27 11:00:00', '2014-08-27 12:00:00', '1', '1'); 

回答

0

問題可減小到的如何結合(緊湊)的基團的相鄰(重疊)的問題的範圍內成一體。我不得不在一段時間之前處理這個問題,並發現它在純SQL中有點複雜。在plpgsql代碼中有一個使用循環的簡單解決方案,但我發現使用自定義聚合的一個通用解決方案。

如果它們是相鄰的(重疊的)compact_ranges(anyrange, anyrange)返回範圍的總和的函數或第二範圍否則:

create or replace function compact_ranges(anyrange, anyrange) 
returns anyrange language sql as $$ 
    select case 
     when $1 && $2 or $1 -|- $2 then $1+ $2 
     else $2 
    end 
$$; 

create aggregate compact_ranges_agg (anyrange) (
    sfunc = compact_ranges, 
    stype = anyrange 
); 

的總量已使用量的範圍窄,但應當稱爲漸進窗口函數像例如:

with test(rng) as (
values 
    ('[ 1, 2)'::int4range), 
    ('[ 3, 7)'), -- group 1 
    ('[ 5, 10)'), -- group 1 
    ('[ 6, 8)'), -- group 1 
    ('[11, 17)'), -- group 2 
    ('[12, 16)'), -- group 2 
    ('[15, 16)'), -- group 2 
    ('[18, 19)') 
) 
select distinct on (lower(new_rng)) new_rng 
from (
    select *, compact_ranges_agg(rng) over (order by rng) new_rng 
    from test 
    ) s 
order by lower(new_rng), new_rng desc; 

new_rng 
--------- 
[1,2) 
[3,10) 
[11,17) 
[18,19) 
(4 rows) 

在你可以用它爲你的表以同樣的方式:

with merged as (
    select tstzrange(start_time, end_time) rng, activity_type, user_id 
    from first_activities 
    union 
    select tstzrange(start_time, end_time) rng, activity_type, user_id 
    from second_activities 
), 
compacted as (
    select distinct on (user_id, activity_type, lower(new_rng)) 
     lower(new_rng) start_time, 
     upper(new_rng) end_time, 
     activity_type, 
     user_id 
    from (
     select 
      user_id, activity_type, 
      compact_ranges_agg(rng) over (partition by user_id, activity_type order by rng) new_rng 
     from merged 
     ) s 
    order by user_id, activity_type, lower(new_rng), new_rng desc 
) 
select 
    start_time, 
    case when end_time > lead(start_time) over w then lead(start_time) over w else end_time end, 
    activity_type, 
    user_id 
from compacted 
window w as (order by start_time) 
order by start_time; 

結果:

 start_time  |  end_time  | activity_type | user_id 
------------------------+------------------------+---------------+--------- 
2014-08-27 10:00:00+02 | 2014-08-27 11:00:00+02 |    2 |  1 
2014-08-27 11:00:00+02 | 2014-08-27 12:00:00+02 |    1 |  1 
2014-10-31 01:00:00+01 | 2014-10-31 02:00:00+01 |    3 |  1 
2014-10-31 02:00:00+01 | 2014-10-31 03:00:00+01 |    4 |  1 
2014-10-31 03:00:00+01 | 2014-10-31 03:30:00+01 |    2 |  1 
2014-10-31 03:30:00+01 | 2014-10-31 04:00:00+01 |    1 |  1 
2014-10-31 04:25:00+01 | 2014-10-31 05:30:00+01 |    3 |  1 
2014-10-31 05:30:00+01 | 2014-11-01 06:00:00+01 |    4 |  1 
2014-11-01 06:30:00+01 | 2014-11-01 07:00:00+01 |    2 |  1 
2014-11-01 07:30:00+01 | 2014-11-01 08:00:00+01 |    1 |  1 
2014-11-01 08:00:00+01 | 2014-11-01 09:00:00+01 |    3 |  1 
2014-11-01 09:00:00+01 | 2014-11-02 10:00:00+01 |    4 |  1 
(12 rows) 
+0

謝謝!我如何拆分交叉點?製作2014-10-31 03:00:00 + 01 | 2014-10-31 04:00:00 + 01 - > 2014-10-31 03:30:00 + 01 | 2014-10-31 04:00:00 + 01? – Dronnikkl

+1

分裂更容易,我又增加了一個階段。 – klin