2010-09-14 82 views
5

的Network.Browser文檔說模塊支持以下功能:Haskell的Network.Browser模塊是否像Perl的LWP或Python的機械化一樣?

  • HTTP認證處理
  • 重定向
  • 曲奇商店+傳輸的透明處理。
  • 事務日誌記錄代理中介連接。

對我來說,這聽起來像一個瀏覽器,讓我抓取網頁的開端,處理authenciation到網站,餅乾等

然而,模塊自帶零示例代碼,指令或教程。我無法弄清楚如何使用它。

有人可以舉例說明它可能如何用於1)轉到網站,2)登錄到它,3)下載需要您登錄的文件?

回答

3

我建議看一下Network.Curl。

要回答你的問題,這裏是(從http://haskell.pastebin.com/9kPiGxiH採取)的例子:

import Data.IORef 
import Network.HTTP 
import Network.Browser 
import Network.URI 
import Data.Maybe 
import Control.Monad 
import Data.List 
import Text.Regex.TDFA 
import Control.Concurrent 

pageUrl off = URI "http:" (Just $ URIAuth "" "www.interpals.net" "") "/dosearch.php" ("?todo=search&sec=adv&age1=15&age2=18&sex[]=FEMALE&lfor[]=lfor_email&lfor[]=lfor_snail&lfor[]=lfor_langex&lfor[]=lfor_friend&lfor[]=lfor_flirt&lfor[]=lfor_relation&countries[]=AT&countries[]=DE&countries[]=CH&state=&languages[]=any&keywords=&sort=p.last_login+DESC&offset="++(show off)) "" 

getPage  :: URI -> BrowserAction (HandleStream [Char]) String 
getPage uri = do 
    setErrHandler $ const $ return() 
    setOutHandler $ const $ return() 
    (_,s) <- request $ Request (uri) GET 
     [Header HdrCookie "__ubic1=MTE3ODM0NDM0MTRjN2RkYTA1OTAzMmU4LjkxODE1Njk2; __utma=46363135.421215970.1283316265.1283538085.1283541700.10; __utmz=46363135.1283316265.1.1.utmccn=(direct)|utmcsr=(direct)|utmcmd=(none); __utmc=46363135; PHPSESSID=59a130c66d4853f85289852f15cefa3a; resolution=1920x1080; ip_auto_login[login]=cap11235; ip_auto_login[password_md5]=NDM0NWM0NDlkZTg4MjRkMWVhZmJmZWNiZTQwOWQ4YTE%3D; __utmb=46363135"] "" 
    return $ rspBody s 

getPeople :: Int -> BrowserAction (HandleStream [Char]) ([String], Int) 
getPeople off = do 
    s <- getPage (pageUrl off) 
    let t = (s=~"<a href='/([^?.]+)\\?")::[[String]] 
    let next = if length t > 0 then off+10 else 0 
    return (nub $ map (!!1) t, next) 

personUrl :: String -> URI 
personUrl name = fromJust $ parseURI ("http://www.interpals.net/"++name) 

viewPerson :: String -> BrowserAction (HandleStream [Char])() 
viewPerson name = do 
    _ <- getPage $ personUrl name 
    return() 

doCycle :: IORef (Int, Int) -> IO() 
doCycle r = do 
    (count, off) <- readIORef r 
    (people, newOff) <- browse $ getPeople off 
    mapM_ (forkIO . browse . viewPerson) people 
    let newCount = count + (length people) 
    writeIORef r (newCount, if newOff<2000 then newOff else 0) 
    print newCount 
    doCycle r 

main = do 
    t <- newIORef (0,0) 
    doCycle t