2010-05-10 28 views
3

我有以下click_log表記錄命中爲某些URLSQL SELECT

site ip ua direction hit_time 
----------------------------------------------------- 
1  127.0.0.1  1   20010/01/01 00:00:00 

2  127.0.0.1  1   20010/01/01 00:01:00 

3  127.0.0.1  0   20010/01/01 00:10:00 

.... ......... 

我要選擇傳入的匹配(方向:1)是網站和組:

  • 從相同的IP和瀏覽器
  • 在10分鐘內彼此登錄
  • 10分鐘內發生4次以上。

我不確定上面是否足夠清楚。英語不是我的第一語言。讓我試着用一個例子來解釋。

如果網站1從該ip和瀏覽器獲得第一個唯一命中後10分鐘內從相同的ip和瀏覽器獲得5次命中,我希望它被包括在選擇中。

基本上我試圖找到濫用者。

+0

您正在運行哪個sql版本? – 2010-05-10 08:31:13

+0

微軟SQL Server 2008 – nLL 2010-05-10 08:39:30

回答

2

我認爲這確實你需要什麼。我也包含了一些示例數據。

Create Table #t 
(
[Site] int, 
IP varchar(20), 
Direction int, 
Hit_Time datetime 
) 

Insert Into #t 
Values (1,'127.0.0.1',1,'2010-01-01 00:00:00') 

Insert Into #t 
Values (1,'127.0.0.1',1,'2010-01-01 00:01:00') 

Insert Into #t 
Values (1,'127.0.0.1',1,'2010-01-01 00:03:00') 

Insert Into #t 
Values (1,'127.0.0.1',1,'2010-01-01 00:04:00') 


Insert Into #t 
Values (2,'127.0.0.2',1,'2010-01-01 00:00:00') 

Insert Into #t 
Values (2,'127.0.0.2',1,'2010-01-01 00:01:00') 

Insert Into #t 
Values (2,'127.0.0.2',0,'2010-01-01 00:03:00') 

Insert Into #t 
Values (2,'127.0.0.2',1,'2010-01-01 00:04:00') 


Select Distinct Site 
From #t 
Where Direction = 1 
Group by Site, IP 
Having (DateDiff(minute,Min(HIt_Time), max(hit_time)) <= 10) And Count(*) >= 4 

Drop Table #t 
+0

謝謝,這看起來不錯,但它通過網站和IP Grupus,所以你得到網站列出不止一次。除此之外,它似乎工作正常 – nLL 2010-05-10 08:59:04

+0

啊我不知道你是否也需要該網站。您只需從Select&Group By子句中刪除Site來解決此問題。 – codingbadger 2010-05-10 09:12:24

+0

不,我只需要網站列出,基本上我需要找到濫用的網站,所以我可以去細節。當我從select和gorup中刪除ip時,結果中也包含單個匹配 – nLL 2010-05-10 09:20:24

0

什麼

SELECT IP, (SELECT COUNT(*) FROM Click_Log WHERE Click_Log.IP = CL.IP 
    AND DIRECTION = 1 AND DATEDIFF(MINUTE, ClickLog.HIT_TIME, CL.HIT_TIME) 
    BETWEEN -10 AND 10) AS CLICK_COUNT 
FROM Click_Log CL 
WHERE DIRECTION = 1 AND CLICK_COUNT > 4 
0
;WITH rankings AS (
    SELECT *, DENSE_RANK() OVER(ORDER BY [site], ip, ua) groupId, 
     ROW_NUMBER() OVER(PARTITION BY [site], ip, ua ORDER BY hit_time) sequence 
    FROM Hits 
    WHERE direction = 1), 
periods AS (
    SELECT r.groupId, r.sequence, count(*) hitCount 
    FROM rankings r 
    LEFT OUTER JOIN rankings r2 
     ON r2.groupId = r.groupId and r2.sequence < r.sequence 
     AND r2.hit_time >= DATEADD(second, -10*60, r.hit_time) 
     AND r2.hit_time < r.hit_time 
    GROUP BY r.groupId, r.sequence 
), 
groups AS (
    SELECT p.groupId, MAX(p.hitCount) maxHitCount 
    FROM periods p 
    GROUP BY p.groupId 
) 
SELECT DISTINCT r.[site], r.ip, r.ua, g.maxHitCount 
FROM rankings r 
INNER JOIN groups g ON g.groupId = r.groupId 
WHERE maxHitCount >= 5 
ORDER BY maxHitCount DESC 
0

我已經響應OP評論加入這個答案。

我用下面的測試數據:

Create Table dbo.Temp 
(
[Site] int, 
IP varchar(20), 
Direction int, 
Hit_Time datetime 
) 

Insert Into dbo.Temp 
Values (1,'127.0.0.1',1,'2010-01-01 00:00:00') 

Insert Into dbo.Temp 
Values (1,'127.0.0.1',1,'2010-01-01 00:01:00') 

Insert Into dbo.Temp 
Values (1,'127.0.0.1',1,'2010-01-01 00:03:00') 

Insert Into dbo.Temp 
Values (1,'127.0.0.1',1,'2010-01-01 00:04:00') 


Insert Into dbo.Temp 
Values (2,'127.0.0.2',1,'2010-01-01 15:00:00') 

Insert Into dbo.Temp 
Values (2,'127.0.0.2',1,'2010-01-01 15:31:00') 

Insert Into dbo.Temp 
Values (2,'127.0.0.2',1,'2010-01-01 15:32:00') 

Insert Into dbo.Temp 
Values (2,'127.0.0.2',1,'2010-01-01 15:33:00') 

Insert Into dbo.Temp 
Values (2,'127.0.0.2',1,'2010-01-01 15:34:00') 

首先,你需要創建一個函數做工作了:

Create Function dbo.fn_CheckSuspectActivity (@Site int, @IP varchar(20), @MinDate datetime, 
                @MaxDate datetime, @Direction int, @Interval int, 
                @MaxCount int) 

                returns int 
     as begin 
     Declare @OrigMaxDate datetime, 
       @IsSuspect int 

     Set @OrigMaxDate = @MaxDate 
     Set @IsSuspect = 0 

     if (DATEDIFF(minute, @MinDate, @MaxDate) > 10) 
       --Min and Max dates for site & Ip 
       -- are more than 10 minutes apart 
       begin 
         --Loop through the records 
         While (@MaxDate <= @OrigMaxDate And @IsSuspect = 0) 
         begin 
           -- Set The MaxDate to the MinDate plus 10 mins 
           Set @MaxDate = DATEADD(Minute, 10, @MinDate) 

           If (Select COUNT(*) 
            From dbo.Temp 
            Where Site = @Site 
            And IP = @IP 
            And Hit_Time >= @MinDate 
            And Hit_Time <= @MaxDate 
            And Direction = @Direction 
            ) >= @MaxCount 

             Begin 
               -- Hit Count exceeded for the specified 10 min range 
               set @IsSuspect = 1 
             End 

            Else 

             Begin 
             -- Set the minDate to the maxDate 
             Set @MinDate = @MaxDate 
             --Add another 10 minutes on 
             Set @MaxDate = DATEADD(minute, 10,@MaxDate) 

             End 

         end 
         -- We've finished the loop but if @IsSuspect is still zero we need to do one final check 
         if (@IsSuspect = 0) 
          begin 
            -- Check the number of records based on the last MinDate used 
            -- and the original MaxDate 

            If (Select COUNT(*) 
            From dbo.Temp 
            Where Site = @Site 
            And IP = @IP 
            And Hit_Time >= @MinDate 
            And Hit_Time <= @OrigMaxDate 
            And Direction = @Direction 
            ) >= @MaxCount 
              begin 
                -- Hit Count exceeded for the specified 10 min range 
                set @IsSuspect = 1 
              end 
             else 
              begin 
                set @IsSuspect = 0 
              end 

          end 

       end 

      else 
       -- Time difference isn't more than 10 minutes so do a "normal" check 
       begin 

         If (Select COUNT(*) 
          From dbo.Temp 
          Where Site = @Site 
          And IP = @IP 
          And Hit_Time >= @MinDate 
          And Hit_Time <= @MaxDate 
          And Direction = @Direction) >= @MaxCount 

          BEGIN -- Its a suspect IP 
            Set @IsSuspect = 1 
          END 

           ELSE 

          BEGIN 
            -- It's ok 
            Set @IsSuspect = 0 
          END 

       end 


return @IsSuspect 

End 
Go 

那麼這個select語句應該給你正確的答案:

With Qry as 
(

Select Site, 
     IP, 
     MIN(Hit_Time) as'MinTime', 
     MAX(Hit_TIme) as 'MaxTime' 

From dbo.Temp 
Group By Site, IP 
) 

Select Site 
From Qry 
Where dbo.fn_CheckSuspectActivity(Site, IP, MinTime, MaxTime, 1, 10, 4) = 1 
-- function params are as follows: Site Number, IP Address, FirstTimeLogged, 
--         LastTimeLogged, Direction, IntervalToCheck, MaxOccurences 

如果第一次和最後一次的日期相隔少於10分鐘,那麼它會檢查我f他們已經超過了命中數。如果第一次約會和最後約會間隔超過10分鐘,則以10分鐘爲間隔增加第一次約會的時間,並檢查在這10分鐘內是否超過了命中次數。

我希望這是你所需要的。

Barry