下面是BigQuery的標準SQL
#standardSQL
WITH strings AS (
SELECT LOWER(str) str FROM UNNEST(['abc', 'XYZ']) AS str
), files AS (
SELECT LOWER(ext) ext FROM UNNEST(['JS', 'go', 'php'])AS ext
)
SELECT
ext, str, COUNT(1) total,
COUNTIF(REGEXP_CONTAINS(LOWER(body), str)) matches,
ROUND(COUNTIF(REGEXP_CONTAINS(LOWER(body), str))/COUNT(1), 3) ratio
FROM `httparchive.har.2017_09_01_chrome_requests_bodies` b
JOIN files f ON LOWER(url) LIKE CONCAT('%.', ext)
CROSS JOIN strings s
GROUP BY ext, str
-- ORDER BY ext, str
您可以測試/以上使用[全部]虛擬數據如下
#standardSQL
WITH `httparchive.har.2017_09_01_chrome_requests_bodies` AS (
SELECT '1234.js' AS url, 'abc=1;x=2' AS body UNION ALL
SELECT 'qaz.js', 'y=1;xyz=0' UNION ALL
SELECT 'edc.go', 's=1;xyz=2;abc=3' UNION ALL
SELECT 'edc.go', 's=1;xyz=4;abc=5' UNION ALL
SELECT 'rfv.php', 'd=1' UNION ALL
SELECT 'tgb.txt', '?abc=xyz' UNION ALL
SELECT 'yhn.php', 'like v' UNION ALL
SELECT 'ujm.go', 'lkjsad' UNION ALL
SELECT 'ujm.go', 'yhj' UNION ALL
SELECT 'ujm.go', 'dfgh' UNION ALL
SELECT 'ikl.js', 'werwer'
), strings AS (
SELECT LOWER(str) str FROM UNNEST(['abc', 'XYZ']) AS str
), files AS (
SELECT LOWER(ext) ext FROM UNNEST(['JS', 'go', 'php'])AS ext
)
SELECT
ext, str, COUNT(1) total,
COUNTIF(REGEXP_CONTAINS(LOWER(body), str)) matches,
ROUND(COUNTIF(REGEXP_CONTAINS(LOWER(body), str))/COUNT(1), 3) ratio
FROM `httparchive.har.2017_09_01_chrome_requests_bodies` b
JOIN files f ON LOWER(url) LIKE CONCAT('%.', ext)
CROSS JOIN strings s
GROUP BY ext, str
ORDER BY ext, str
感謝玩,但我得到的錯誤:查詢失敗。錯誤:未找到字段'str'。 – DevDavid
@NhanNguyen。 。 。感謝您解決這個問題。 –
我只得到它與一個字符串,如果我添加更多,我得到不正確的計數/比率(加上我怎麼可以引用哪些數據屬於哪個字符串?): 'SELECT s.str,s.str2,matched,matched2 (*)AS total,RATIO_TO_REPORT(total)OVER()AS ratio FROM(SELECT crb.url,s.str,s.str2,(LOWER(crb.body)CONTAINS s.str)AS matched,(LOWER (crb.body)CONTAINS s.str2)AS matched2 FROM httparchive.har.2017_09_01_chrome_requests_bodies crb CROSS JOIN (SELECT'document.write'as str,'document.ready'as str2)s WHERE URL LIKE「%.js 「) GROUP BY s.str,s.str2,matched,matched2;' – DevDavid