2016-06-12 84 views
0

有一個表A。從表A一行看起來如下:SQL從更改日誌恢復歷史數據

+----+---------+---------+---------+------------+------------+------------------+------------------+ 
| id | value_a | value_b | value_c | created_on | created_by | last_modified_on | last_modified_by | 
+----+---------+---------+---------+------------+------------+------------------+------------------+ 
| 42 | x  | y  | z  | 2016-04-01 | Maria  | 2016-05-01  | Jim    | 
+----+---------+---------+---------+------------+------------+------------------+------------------+ 

所以,表A只包含最新的值。

還有一個表changelog。它存儲有關表A的所有更改/更新。 changelog記錄表A看起來像以下:

+-----+-----------+--------+---------+-----------+-----------------------------------------+------------+------------+ 
| id | object_id | action | field | old_value |    new_value    | created_on | created_by | 
+-----+-----------+--------+---------+-----------+-----------------------------------------+------------+------------+ 
| 234 |  42 | insert | NULL | NULL  | {value_a: xx, value_b: yy, value_c: zz} | 2016-04-01 | Maria  | 
| 456 |  42 | update | value_a | xx  | x          | 2016-04-05 | Bob  | 
| 467 |  42 | update | value_b | yy  | y          | 2016-05-01 | Jim  | 
| 678 |  42 | update | value_c | zz  | z          | 2016-05-01 | Jim  | 
+-----+-----------+--------+---------+-----------+-----------------------------------------+------------+------------+ 

我需要創建一個historical_A表中,在這個特定的記錄將類似於如下:

+----+---------+---------+---------+------------+------------+------------+--------------+ 
| id | value_a | value_b | value_c | valid_from | created_by | valid_to | modified_by | 
+----+---------+---------+---------+------------+------------+------------+--------------+ 
| 42 | xx  | yy  | zz  | 2016-04-01 | Maria  | 2016-04-05 | Bob   | 
| 42 | x  | yy  | zz  | 2016-04-05 | Bon  | 2016-05-01 | Jim   | 
| 42 | x  | y  | z  | 2016-05-01 | Jim  |   |    | 
+----+---------+---------+---------+------------+------------+------------+--------------+ 

A大約有1 500 000行, changelogA有約270000行。

目前我正在使用SQL和Python腳本進行初始轉換(加載)。基本上,我爲初始行生成插入語句(通過解析json),然後生成changelog表的created_on列的所有後面的插入語句。 目前需要大約3分鐘來處理1000行A表。因此,我正在平行(x10)執行腳本以更及時地得到結果。

我懷疑Sql + Python腳本不是解決問題的最佳方案。對於所提出的問題,是否有純粹的SQL解決方案? 這些問題是否有任何既定的最佳實踐?

+0

。@ Dennis在MYSQL論壇上提問也是如此。 – Merlin

+0

@美林只是想給你一個網址:)謝謝,我會去做的。這似乎是這樣一個典型的問題,但我沒有找到一個「適當的」解決方案:) – Dennis

+0

可能值得避免通過從當前位置向後工作來分割插入new_value的混亂業務 - 插入行只提供日期和累積更改(以相反順序)提供插入數據。 –

回答

-1

不幸的是我的MYSQL盒子壞了,所以我在SQL Server中這樣做了,但我不認爲代碼中存在兼容性問題。如果它對你有用,並且它的表現有多好,我會感興趣。您可能需要添加索引以加速性能。 - SQL Restoring historical data from the changelog

/* 
create table a 
(id int, value_a varchar(20), value_b varchar(20), value_c varchar(20), 
created_on date, created_by varchar(20), last_modified_on date, last_modified_by varchar(20)); 

create table changelog 
(id int, object_id int, action varchar(20), field varchar(20) , old_value varchar(20), new_value varchar(50), created_on date, created_by varchar(20)); 

create table history_work 
(changeid int,objectid int, value_a varchar(20), value_b varchar(20), value_c varchar(20), value_a_new varchar(20), value_b_new varchar(20), value_c_new varchar(20), 
created_on date, created_by varchar(20), last_modified_on date, last_modified_by varchar(20)); 

CREATE TABLE `history` (
    `changeid` INT(11) NULL DEFAULT NULL, 
    `objectid` INT(11) NULL DEFAULT NULL, 
    `value_a` VARCHAR(20) NULL DEFAULT NULL, 
    `value_b` VARCHAR(20) NULL DEFAULT NULL, 
    `value_c` VARCHAR(20) NULL DEFAULT NULL, 
    `valid_from` DATE NULL DEFAULT NULL, 
    `created_by` VARCHAR(20) NULL DEFAULT NULL, 
    `valid_to` DATE NULL DEFAULT NULL, 
    `last_modified_by` VARCHAR(20) NULL DEFAULT NULL 
) 
COLLATE='latin1_swedish_ci' 
ENGINE=InnoDB 

drop table if exists t; 
CREATE TABLE `t` (
    `changeid` INT(11) NULL DEFAULT NULL, 
    `objectid` INT(11) NULL DEFAULT NULL, 
    `value_a` VARCHAR(20) NULL DEFAULT NULL, 
    `value_b` VARCHAR(20) NULL DEFAULT NULL, 
    `value_c` VARCHAR(20) NULL DEFAULT NULL, 
    `value_a_new` VARCHAR(20) NULL DEFAULT NULL, 
    `value_b_new` VARCHAR(20) NULL DEFAULT NULL, 
    `value_c_new` VARCHAR(20) NULL DEFAULT NULL, 
    `created_on` DATE NULL DEFAULT NULL, 
    `created_by` VARCHAR(20) NULL DEFAULT NULL, 
    `last_modified_on` DATE NULL DEFAULT NULL, 
    `last_modified_by` VARCHAR(20) NULL DEFAULT NULL 
) 
COLLATE='latin1_swedish_ci' 
ENGINE=InnoDB 
; 
; 
expected result 
+----+---------+---------+---------+------------+------------+------------+--------------+ 
| id | value_a | value_b | value_c | valid_from | created_by | valid_to | modified_by | 
+----+---------+---------+---------+------------+------------+------------+--------------+ 
| 42 | xx  | yy  | zz  | 2016-04-01 | Maria  | 2016-04-05 | Bob   | 
| 42 | x  | yy  | zz  | 2016-04-05 | Bon  | 2016-05-01 | Jim   | 
| 42 | x  | y  | z  | 2016-05-01 | Jim  |   |    | 
+----+---------+---------+---------+------------+------------+------------+--------------+ 

*/ 

truncate table a; 
truncate table changelog; 
truncate table history_work; 
Insert into a values 
(42 , 'x' ,  'y'  , 'z'  ,'2016-04-01' ,'Maria','2016-05-01', 'Jim'); 

insert into changelog values 
(234 ,  42 , 'insert' , NULL  , NULL  , '{value_a: xx, value_b: yy, value_c: zz}' , '2016-04-01', 'Maria'),  
(456 ,  42 , 'update' , 'value_a' ,'xx',   'x',          '2016-04-05', 'Bob' ),  
(467 ,  42 , 'update' , 'value_b' ,'yy',   'y',          '2016-05-01', 'Jim' ),  
(678 ,  42 , 'update' , 'value_c' ,'zz',   'z',          '2016-05-01', 'Jim' ) ; 

/*Dummy Insert record*/ 
insert into history_work 
(changeid ,objectid, 
#, value_a , value_b , value_c, 
created_on , created_by, last_modified_on,last_modified_by 
) 
select 
000,id, #, value_a , value_b , value_c, 
created_on, created_by, last_modified_on, last_modified_by 
from a; 
/* 
insert into history_work 
(changeid ,objectid , value_a , value_b , value_c, created_on , created_by, last_modified_on,last_modified_by) 
select 
999,id , value_a , value_b , value_c, created_on, created_by, last_modified_on, last_modified_by 
from a 
*/ 

insert into history_work 
(changeid ,objectid , value_a , value_b , value_c, value_a_new, value_b_new , value_c_new, 
created_on , created_by, last_modified_on,last_modified_by) 
select a.id, 
     a.object_id, 
     case 
      when field = 'value_a' then a.old_value 
      else null 
     end, 
     case 
      when field = 'value_b' then a.old_value 
      else null 
     end, 
     case 
      when field = 'value_c' then a.old_value 
      else null 
     end, 
     case 
      when field = 'value_a' then a.new_value 
      else null 
     end, 
     case 
      when field = 'value_b' then a.new_value 
      else null 
     end, 
     case 
      when field = 'value_c' then a.new_value 
      else null 
     end, 
     a.created_on,a.created_by, 
     a.created_on,a.created_by 
from changelog a 
#join history_work h on h.objectid = a.object_id and h.changeid = 999 
where action <> 'insert'; 

/*Derive Insert values from first old_value*/ 
truncate table t; 
insert into t 
(changeid, objectid) 
select distinct 0,objectid from history_work; 

update t 
set value_a = (select hw.value_a from history_work hw 
         where hw.objectid = t.objectid 
         and hw.changeid = (select min(changeid) from history_work a where a.objectid = hw.objectid and a.value_a is not null)), 
     value_b = (select hw.value_b from history_work hw 
         where hw.objectid = t.objectid 
         and hw.changeid = (select min(changeid) from history_work a where a.objectid = hw.objectid and a.value_b is not null)), 
     value_c = (select hw.value_c from history_work hw 
         where hw.objectid = t.objectid 
         and hw.changeid = (select min(changeid) from history_work a where a.objectid = hw.objectid and a.value_c is not null)); 


update history_work h 
join  t on t.objectid = h.objectid 
set  h.value_a = t.value_a, h.value_b = t.value_b, h.value_c = t.value_c 
where  h.changeid = 0; 

#select  * from history_work; 

/*Get Changes*/ 
update history_work set value_a = value_a_new where value_a_new is not null; 
update history_work set value_b = value_b_new where value_b_new is not null; 
update history_work set value_c = value_c_new where value_c_new is not null; 

/*Downfill and create final table*/ 
truncate table history; 
insert into history 
( `changeid` , 
    `objectid` , 
    `value_a` , 
    `value_b` , 
    `value_c` , 
    `valid_from` , 
    `created_by` , 
    `valid_to` , 
    `last_modified_by` 
) 
select h.changeid,h.objectid , 
     (select a.value_a from history_work a where a.changeid = 
     (select max(changeid) from history_work h1 where h1.objectid = h.objectid and h1.value_a is not null and h1.changeid <= h.changeid) 
     ) value_a, 

     (select a.value_b from history_work a where a.changeid = 
     (select max(changeid) from history_work h1 where h1.objectid = h.objectid and h1.value_b is not null and h1.changeid <= h.changeid) 
     ) value_b, 

     (select a.value_c from history_work a where a.changeid = 
     (select max(changeid) from history_work h1 where h1.objectid = h.objectid and h1.value_c is not null and h1.changeid <= h.changeid) 
     ) value_c, 

     h.created_on,h.created_by,h.last_modified_on,h.last_modified_by 
from history_work h 
where h.changeid in (select maxid from  
     (select  a.created_on, a.created_by,a.object_id, min(id) minid,max(a.id) maxid 
     from  changelog a 
     group by a.created_on, a.created_by,a.object_id) s 
     ) 
     or h.changeid = 0 
order by h.changeid; 

truncate table t; 

insert into t 
(changeid, objectid,value_a,value_b,value_c,created_on,created_by,last_modified_on,last_modified_by) 
select changeid,objectid, 
     value_a, 
     value_b, 
     value_c, 
     valid_from,created_by, 
     valid_to,last_modified_by 
from  history 
; 

update history h 
    set h.valid_to = 
     ((select a.created_on from t a where a.changeid = (select min(b.changeid) from t b where b.objectid = a.objectid and b.changeid > h.changeid))), 
     last_modified_by = 
     (select a.created_by from t a where a.changeid = (select min(changeid) from t b where b.objectid = a.objectid and b.changeid > h.changeid)) 
; 
select * from history; 
+0

謝謝你。我非常感謝這個!今天晚上我會回答你的問題。真的需要在此 – Dennis

+0

更有經驗的人的洞察力卡在第一個old_value_階段_Der​​ive插入值。給出「你無法在FROM子句中指定目標表'history_work'進行更新」 – Dennis

+0

Ooops是一個兼容性的東西,當我回到MYSQL時我會解決這個問題。 –