2011-02-03 44 views
1

我有可能屬於幾個類別,可以包含幾個標記(字)文件:確定與TSQL一些統計

create table Tokens (
     Id INT not null, 
     Text NVARCHAR(255) null, 
     primary key (Id) 
    ) 

create table DocumentClassTokens (
     Id INT not null, 
     DocumentFk INT null, 
     ClassFk INT null, 
     TokenFk INT null, 
     primary key (Id) 
    ) 

我想確定這些統計數據(對於給定類的所有令牌):

  • A =含有令牌和屬於不同的類文件的數
  • B =含有令牌和不屬於不同類的文件數
  • C =不同的文件的數量不包含令牌,屬於類
  • d =不同的文件的數量不包含令牌,並且不屬於類

我用這的時刻,但它看起來不正確(我很確定A和B的計算是正確的):

declare @class int; 

select @class = id from dbo.Classes where text = 'bla' 

;with A as 
(
    select 
     a.text as token, 
     count(distinct DocumentFk) as A 
    from dbo.Tokens as a 
    inner join dbo.DocumentClassTokens as b on a.id = b.TokenFk and b.ClassFk = @class 
    group by a.text 
) 
,B as 
(
    select 
     a.text as token, 
     count(distinct DocumentFk) as B 
    from dbo.Tokens as a 
    inner join dbo.DocumentClassTokens as b on a.id = b.TokenFk and b.ClassFk != @class 
    group by a.text 
) 
,C as 
(
    select 
     a.text as token, 
     count(distinct DocumentFk) as C 
    from dbo.Tokens as a 
    inner join dbo.DocumentClassTokens as b on a.id != b.TokenFk and b.ClassFk = @class 
    group by a.text 
) 
,D as 
(
    select 
     a.text as token, 
     count(distinct DocumentFk) as D 
    from dbo.Tokens as a 
    inner join dbo.DocumentClassTokens as b on a.id != b.TokenFk and b.ClassFk != @class 
    group by a.text 
) 
select 
    case when A is null then 0 else A end as A, 
    case when B is null then 0 else B end as B, 
    case when C is null then 0 else C end as C, 
    case when D is null then 0 else D end as D, 
    t.Text, 
    t.id 
from dbo.Tokens as t 
left outer join A as a on t.text = a.token 
left outer join B as b on t.text = b.token 
left outer join C as c on t.text = c.token 
left outer join D as d on t.text = d.token 
order by t.text 

任何反饋將非常感激。非常感謝!

最良好的祝願,

基督教

PS:

一些測試數據:

use play; 

drop table tokens 
create table Tokens 
(
    Id INT not null, 
    Text NVARCHAR(255) null, 
    primary key (Id) 
) 

insert into Tokens (id, text) values (1,'1') 
insert into Tokens (id, text) values (2,'2') 

drop table DocumentClassTokens 
create table DocumentClassTokens (
     Id INT not null, 
     DocumentFk INT null, 
     ClassFk INT null, 
     TokenFk INT null, 
     primary key (Id) 
    ) 

insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (1,1,1,1) 
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (2,1,1,2) 
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (3,2,1,1) 
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (4,2,2,1) 
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (5,3,2,1) 
insert into DocumentClassTokens (Id,documentfk,ClassFk,TokenFk) values (6,3,2,3) 
+0

在您的描述中,您正在討論包含或不包含(某些?)令牌並且屬於或不屬於(再次,某些?)類的文檔。現在,在您向我們呈現的腳本中,您已明確聲明`@ class`參數,因此它肯定是一個*確定*類,將與之進行比較。在描述之後,你的腳本中還應該聲明`@ token`參數,不應該在那裏?還是你的意思是'包含*標記*'(即任何標記?)。請澄清它應該是什麼。 – 2011-02-04 08:28:30

+0

@ Andriy M感謝您的回覆。該腳本應該返回一個包含四列記號A,B,C,D的表格。所以,我想獲得給定類別的所有記號的統計信息。 – cs0815 2011-02-04 08:55:30

回答

1

你的問題現在似乎更加清晰了,如果我沒有忽視任何東西,那麼這裏就是你可以嘗試對您的數據運行查詢。

DECLARE @class int; 
SET @class = 1; 

SELECT 
    TokenFk, 
    TokenClassDocs      AS A, 
    TokenNonClassDocs      AS B, 
    TotalClassDocs - TokenClassDocs AS C, 
    TotalNonClassDocs - TokenNonClassDocs AS D 
FROM (
    SELECT 
    TokenFk, 
    COUNT(DISTINCT CASE ClassFk WHEN @class THEN DocumentFk ELSE NULL END) AS TokenClassDocs, 
    COUNT(DISTINCT CASE ClassFk WHEN @class THEN NULL ELSE DocumentFk END) AS TokenNonClassDocs 
    FROM DocumentClassTokens dct 
    GROUP BY dct.TokenFk 
) AS bytoken 
    CROSS JOIN (
    SELECT 
     COUNT(DISTINCT CASE ClassFk WHEN @class THEN DocumentFk ELSE NULL END) AS TotalClassDocs, 
     COUNT(DISTINCT CASE ClassFk WHEN @class THEN NULL ELSE DocumentFk END) AS TotalNonClassDocs 
    FROM DocumentClassTokens 
) AS totals 

請讓我們知道它是否正確。


EDIT

上述溶液是錯誤的。這是固定的,而且它看起來是正確的,只是我不喜歡錯誤的版本(多麼諷刺...)。

DECLARE @class int; 
SET @class = 1; 

SELECT 
    TokenFk, 
    TokenClassDocs      AS A, 
    TokenNonClassDocs      AS B, 
    TotalClassDocs - TokenClassDocs AS C, 
    TotalNonClassDocs - TokenNonClassDocs AS D 
FROM (
    SELECT 
    TokenFk, 
    COUNT(DISTINCT cls.DocumentFk) AS TokenClassDocs, 
    COUNT(DISTINCT CASE WHEN cls.DocumentFk IS NULL THEN dct.DocumentFk END) AS TokenNonClassDocs 
    FROM DocumentClassTokens dct 
    LEFT JOIN (
     SELECT DISTINCT DocumentFk 
     FROM DocumentClassTokens 
     WHERE ClassFk = @class 
    ) cls ON dct.DocumentFk = cls.DocumentFk 
    GROUP BY dct.TokenFk 
) AS bytoken 
    CROSS JOIN (
    SELECT 
     COUNT(DISTINCT cls.DocumentFk) AS TotalClassDocs, 
     COUNT(DISTINCT CASE WHEN cls.DocumentFk IS NULL THEN dct.DocumentFk END) AS TotalNonClassDocs 
    FROM DocumentClassTokens dct 
     LEFT JOIN (
     SELECT DISTINCT DocumentFk 
     FROM DocumentClassTokens 
     WHERE ClassFk = @class 
    ) cls ON dct.DocumentFk = cls.DocumentFk 
) AS totals 

注:我想我現在可以看到你如何檢查,如果數據是錯誤的:A,B,C,d的每一行(即對每個令牌)的總和必須等於總文件計數,這應該不會令人意外,因爲每個文件都可以滿足1個且正在探索的4個案例中只有1個。如果行總數與文檔總數不同,那麼行中的某些數字肯定是錯誤的。

1

這接縫你想通過你的描述是什麼。看着你的代碼,我不太確定。

編輯1使用列而不是行和@ClassID作爲過濾器。

declare @ClassID int 
set @ClassID = 1 

;with cte(DokumentFk, TokenFk, ClassFk) as 
(
    select DocumentFk, max(TokenFK), max(ClassFk) 
    from DocumentClassTokens 
    where ClassFK = @ClassID 
    group by DocumentFK 
) 
select 
(select count(*) 
from cte 
where 
    TokenFk is not null and 
    ClassFk is not null) as A, 
(select count(*) 
from cte 
where 
    TokenFk is not null and 
    ClassFk is null) as B, 
(select count(*) 
from cte 
where 
    TokenFk is null and 
    ClassFk is not null) as C, 
(select count(*) 
from cte 
where 
    TokenFk is null and 
    ClassFk is null) as D 
+0

@Mikael Eriksson:AFAIK,聚合總是返回一個值。您應該用零比較替換NULL檢查。 (儘管我還沒有分析腳本的邏輯。) – 2011-02-04 08:00:25