2013-02-24 59 views
1

Haskell包hxt的使用對我來說仍然有點奇怪。特別是箭頭符號和由此產生的類型是一種魔力。幾次處理一個XML文檔

我到目前爲止無法管理以下內容:我想處理一個主要由兩部分組成的XML文件。一個保留對象的定義,第二個對象的用法/目的。首先,我想寫一些hxt處理來獲取part1上的Haskell數據結構,在處理後的第2部分之後,最後將兩個數據結構組合在一起,以讀取程序的真實邏輯。

處理文件現在正常,感謝the arrows tutorial。但是我想現在有一個符號做三個步驟:讀取文檔(懶惰),用第一個處理器處理結果結構一次,然後再用第二個處理器處理相同的結構。我不想要的是像下面的例子那樣調用「readDocument」兩次。

import Text.XML.HXT.Core 
import Data.Char(toUpper) 
import Data.Tree.NTree.TypeDefs 

play filename = do 
        results <- runX (getAllAddresses filename) 
        results2 <- runX (getAllAddressesUsages filename) 
        print results 
        print results2 



getAllAddresses :: FilePath -> IOSArrow XmlTree [(String,NTree XNode)] 
getAllAddresses filename = 
    readDocument [withValidate no] filename >>> 
    getChildren >>> 
    isElem >>> hasName "main" >>> 
    getChildren >>> 
    isElem >>> hasName "part1" >>> 
    getChildren >>> 
    isElem >>> hasName "address" >>> 
    listA(getAddress)     -- create a list for each variable, so use listA 



getAddress :: IOSArrow XmlTree (String,NTree XNode) 
getAddress = 
    getChildren >>> 
    isElem >>> 
     (
      neg (hasName "location") >>> -- all elements being no "location" 
      getName &&& (getChildren)  -- get the name and the value for each element 
     ) 
    <+>  
    ( 
     hasName "location" >>>    -- work on all nodes within the "location" subcontainer 
     getChildren >>> 
     isElem >>> 
     (getName &&& (getChildren))  -- get the name and the value for each element 
    ) 




getAllAddressesUsages :: FilePath -> IOSArrow XmlTree [(String,NTree XNode)] 
getAllAddressesUsages filename = 
    readDocument [withValidate no] filename >>> 
    getChildren >>> 
    isElem >>> hasName "main" >>> 
    getChildren >>> 
    isElem >>> hasName "part2" >>> 
    getChildren >>> 
    listA(getAddressUsagePurpose2)     -- create a list for each variable, so use listA 

getAddressUsagePurpose2 :: IOSArrow XmlTree (String,NTree XNode) 
getAddressUsagePurpose2 = 
    hasName "use_obj-names_for_purpose_2" >>>   -- work on all nodes with usage 2 
    (getName &&& (getChildren))      -- get the name and the value for each element 

示例數據:

<main> 
<part1> 
    <address> 
    <obj-name>one</obj-name> 
    <name>peter 1</name> 
    <street>streetname 1</street> 
    <location> 
     <country>Germany</country> 
     <state>Baden Wuerttemberg</state> 
    </location> 
    </address> 
    <address> 
    <obj-name>two</obj-name> 
    <name>peter 2</name> 
    <street>streetname 2</street> 
    <location> 
     <country>Germany</country> 
     <state>Nordrhein Westfalen</state> 
     </location> 
    </address> 
</part1> 
<part2> 
    <use_obj-names_for_purpose_1> 
    <obj-name>two</obj-name> 
    </use_obj-names_for_purpose_1> 
    <use_obj-names_for_purpose_2> 
    <obj-name>two</obj-name> 
    </use_obj-names_for_purpose_2> 
</part2> 
</main> 

所以正規的問題是:

怎樣的單子在比賽中的作用看,爲了得到這樣做,如:

readXmlDocument :: String -> IOSArrow XmlTree (NTree XNode) 
readXmlDocument filename = readDocument [withValidate no] filename 

play filename = do 
      document <- readXmlDocument filename 
      allAddresses <- getAllAddresses document 
      allPurposes <- getAllAddressesUsages document 
      result <- processLogics allAddresses allPurposes 
      print result 

我如何從Monads到Arrows,回到Monads,再回到普通數據,然後回到Monads。

爲什麼我這麼做?

回答

1

一種解決問題的方法如下:

使用箭頭語言擴展,並使用「PROC」表達成處理兩個處理器路徑在一個功能讀取的原稿。結果組合在一個元組中。仍然這個元組包含需要運行的兩個箭頭。這由runX函數的兩個應用程序完成。

一旦bot結果在下面的計算中被合併,我仍然不完全知道該文件是否被這個構造加載了一次或兩次。

{-# LANGUAGE Arrows #-} 

import Text.XML.HXT.Core 
import Data.Char(toUpper) 
import Data.Tree.NTree.TypeDefs 


play filename = (runX addresses, runX usages) 
    where (addresses,usages)=(analyseXml (readXmlDocument filename)) 

analyseXml :: IOSArrow XmlTree (NTree XNode) -> (IOSArrow XmlTree [(String,NTree XNode)],IOSArrow XmlTree String) 
analyseXml = proc document -> do 
       allAddresses <- getAllAddresses -< document 
       allUsages <- getAllAddressesUsages -< document 
       returnA -< (allAddresses,allUsages) 

readXmlDocument :: String -> IOSArrow XmlTree (NTree XNode) 
readXmlDocument filename = readDocument [withValidate no] filename 



getAllAddresses :: IOSArrow XmlTree (NTree XNode) -> IOSArrow XmlTree [(String,NTree XNode)] 
getAllAddresses document = 
    document >>> 
    getChildren >>> 
    isElem >>> hasName "main" >>> 
    getChildren >>> 
    isElem >>> hasName "part1" >>> 
    getChildren >>> 
    isElem >>> hasName "address" >>> 
    listA(getAddress)     -- create a list for each variable, so use listA 



getAddress :: IOSArrow XmlTree (String,NTree XNode) 
getAddress = 
    getChildren >>> 
    isElem >>> 
     (
      neg (hasName "location") >>> -- all elements being no "location" 
      getName &&& (getChildren)  -- get the name and the value for each element 
     ) 
    <+>  
    ( 
     hasName "location" >>>    -- work on all nodes within the "location" subcontainer 
     getChildren >>> 
     isElem >>> 
     (getName &&& (getChildren))  -- get the name and the value for each element 
    ) 




getAllAddressesUsages :: IOSArrow XmlTree (NTree XNode) -> IOSArrow XmlTree String 
getAllAddressesUsages document = 
    document >>> 
    getChildren >>> 
    isElem >>> hasName "main" >>> 
    getChildren >>> 
    isElem >>> hasName "part2" >>> 
    getChildren >>> 
    isElem >>> hasName "use_obj-names_for_purpose_2" >>> 
    getChildren >>> 
    isElem >>> hasName "obj-name" >>> 
    getChildren >>> 
    getText     -- create a list with objects for each short-name. So use listA 

執行可以做到如下:

*Main> snd (play "../tmp/haskell/test.xml") 
["two"] 

*Main> fst (play "../tmp/haskell/test.xml") 
[[("obj-name",NTree (XText "one") []),("name",NTree (XText "peter 1") []),("street",NTree (XText "streetname 1") []),("country",NTree (XText "Germany") []),("state",NTree (XText "Baden Wuerttemberg") [])],[("obj-name",NTree (XText "two") []),("name",NTree (XText "peter 2") []),("street",NTree (XText "streetname 2") []),("country",NTree (XText "Germany") []),("state",NTree (XText "Nordrhein Westfalen") [])]] 
*Main>