2016-03-07 93 views
5

蘋果表示,NSRegularExpression是基於ICU正則表達式庫:目前支持的模式語法https://developer.apple.com/library/ios/documentation/Foundation/Reference/NSRegularExpression_Class/命名捕獲組NSRegularExpression - 獲得一定範圍內的組的名稱

是由ICU規定。 ICU正則表達式在http://userguide.icu-project.org/strings/regexp中描述。

該網頁(上icu-project.org)聲稱,命名捕捉組現在支持,使用相同的語法.NET正則表達式:

(?<name>...)命名捕獲組。 <angle brackets>是字面的 - 它們出現在模式中。

我寫了一個程序,它得到一個唯一的匹配這似乎正確的多個範圍 - 儘管每個範圍返回兩次(原因不明) - 但我的唯一信息是範圍的指數和它的文本範圍。

例如,正則表達式:^(?<foo>foo)\.(?<bar>bar)\.(?<bar2>baz)$與測試字符串foo.bar.baz

給我的結果:

Idx Start Length  Text 
0  0  11   foo.bar.baz 
1  0   3   foo 
2  4   3   bar 
3  8   3   baz 

有沒有辦法知道,「baz」從捕獲組bar2來的?

+0

您是否看過[用NSRegularExpression命名捕獲組](http://stackoverflow.com/questions/24814974/named-capture-groups-with-nsregularexpression)? –

+0

@Thomas @Thomas我確實看到了,但是從2014年開始,討論都說不支持命名捕獲組 - 但是(至少在iOS 9和OS X 10.11上)確實支持 - 並且它們正在工作至少在我的機器上,我不能將結果範圍映射回他們來自的組。 – Dai

+0

* iOS 4.0及更高版本*中提供的Apple文檔狀態... –

回答

1

我面臨同樣的問題,並最終支持我自己的解決方案。隨意評論或改善;-)

extension NSRegularExpression { 
    typealias GroupNamesSearchResult = (NSTextCheckingResult, NSTextCheckingResult, Int) 

    private func textCheckingResultsOfNamedCaptureGroups() throws -> [String:GroupNamesSearchResult] { 
     var groupnames = [String:GroupNamesSearchResult]() 

     let greg = try NSRegularExpression(pattern: "^\\(\\?<([\\w\\a_-]*)>.*\\)$", options: NSRegularExpressionOptions.DotMatchesLineSeparators) 
     let reg = try NSRegularExpression(pattern: "\\([^\\(\\)]*\\)", options: NSRegularExpressionOptions.DotMatchesLineSeparators) 
     let m = reg.matchesInString(self.pattern, options: NSMatchingOptions.WithTransparentBounds, range: NSRange(location: 0, length: self.pattern.utf16.count)) 
     for (n,g) in m.enumerate() { 
      let gstring = self.pattern.substringWithRange(g.rangeAtIndex(0).toRange()!) 
      print(self.pattern.substringWithRange(g.rangeAtIndex(0).toRange()!)) 
      let gmatch = greg.matchesInString(gstring, options: NSMatchingOptions.Anchored, range: NSRange(location: 0, length: gstring.utf16.count)) 
      if gmatch.count > 0{ 
       groupnames[gstring.substringWithRange(gmatch[0].rangeAtIndex(1).toRange()!)] = (g,gmatch[0],n) 
      } 

     } 
     return groupnames 
    } 
    func indexOfNamedCaptureGroups() throws -> [String:Int] { 
     var groupnames = [String:Int]() 
     for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() { 
      groupnames[name] = n + 1 
     } 
     //print(groupnames) 
     return groupnames 
    } 

    func rangesOfNamedCaptureGroups(match:NSTextCheckingResult) throws -> [String:Range<Int>] { 
     var ranges = [String:Range<Int>]() 
     for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() { 
      ranges[name] = match.rangeAtIndex(n+1).toRange() 
     } 
     return ranges 
    } 
} 

下面是一個使用示例:

let node = "'test_literal'" 
let regex = try NSRegularExpression(pattern: "^(?<delimiter>'|\")(?<value>.*)(?:\\k<delimiter>)$", options: NSRegularExpressionOptions.DotMatchesLineSeparators) 
let match = regex.matchesInString(node, options: NSMatchingOptions.Anchored, range: NSRange(location: 0,length: node.utf16.count)) 
if match.count > 0 { 

    let ranges = try regex.rangesOfNamedCaptureGroups(match[0]) 
    guard let range = ranges["value"] else { 

    } 
} 
+0

有趣!你能用一些使用例子來修改你的答案嗎? – Dai

+0

完成。在第二次看後,我也意識到這並不明顯......在幾個月前編寫了這個代碼,並有理由去在兩個不同的調用中進行匹配和範圍。 –

1

Daniele Bernardini創建我對代碼工作。

有一些變化:

  • 首先代碼是現在斯威夫特3
  • 丹尼爾的代碼有它不會捕捉嵌套捕捉的缺陷兼容。我已經使正則表達式略微不積極,以允許嵌套捕獲組。
  • 我更喜歡實際接收Set中的實際捕獲。我添加了一個名爲captureGroups()的方法,它以字符串而不是範圍的形式返回捕獲。

    import Foundation 
    
    extension String { 
        func matchingStrings(regex: String) -> [[String]] { 
         guard let regex = try? NSRegularExpression(pattern: regex, options: []) else { return [] } 
         let nsString = self as NSString 
         let results = regex.matches(in: self, options: [], range: NSMakeRange(0, nsString.length)) 
         return results.map { result in 
          (0..<result.numberOfRanges).map { result.rangeAt($0).location != NSNotFound 
           ? nsString.substring(with: result.rangeAt($0)) 
           : "" 
          } 
         } 
        } 
    
        func range(from nsRange: NSRange) -> Range<String.Index>? { 
         guard 
          let from16 = utf16.index(utf16.startIndex, offsetBy: nsRange.location, limitedBy: utf16.endIndex), 
          let to16 = utf16.index(utf16.startIndex, offsetBy: nsRange.location + nsRange.length, limitedBy: utf16.endIndex), 
          let from = from16.samePosition(in: self), 
          let to = to16.samePosition(in: self) 
          else { return nil } 
         return from ..< to 
        } 
    
    } 
    
    extension NSRegularExpression { 
        typealias GroupNamesSearchResult = (NSTextCheckingResult, NSTextCheckingResult, Int) 
    
        private func textCheckingResultsOfNamedCaptureGroups() -> [String:GroupNamesSearchResult] { 
         var groupnames = [String:GroupNamesSearchResult]() 
    
         guard let greg = try? NSRegularExpression(pattern: "^\\(\\?<([\\w\\a_-]*)>$", options: NSRegularExpression.Options.dotMatchesLineSeparators) else { 
          // This never happens but the alternative is to make this method throwing 
          return groupnames 
         } 
         guard let reg = try? NSRegularExpression(pattern: "\\(.*?>", options: NSRegularExpression.Options.dotMatchesLineSeparators) else { 
          // This never happens but the alternative is to make this method throwing 
          return groupnames 
         } 
         let m = reg.matches(in: self.pattern, options: NSRegularExpression.MatchingOptions.withTransparentBounds, range: NSRange(location: 0, length: self.pattern.utf16.count)) 
         for (n,g) in m.enumerated() { 
          let r = self.pattern.range(from: g.rangeAt(0)) 
          let gstring = self.pattern.substring(with: r!) 
          let gmatch = greg.matches(in: gstring, options: NSRegularExpression.MatchingOptions.anchored, range: NSRange(location: 0, length: gstring.utf16.count)) 
          if gmatch.count > 0{ 
           let r2 = gstring.range(from: gmatch[0].rangeAt(1))! 
           groupnames[gstring.substring(with: r2)] = (g, gmatch[0],n) 
          } 
    
         } 
         return groupnames 
        } 
    
        func indexOfNamedCaptureGroups() throws -> [String:Int] { 
         var groupnames = [String:Int]() 
         for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() { 
          groupnames[name] = n + 1 
         } 
         return groupnames 
        } 
    
        func rangesOfNamedCaptureGroups(match:NSTextCheckingResult) throws -> [String:Range<Int>] { 
         var ranges = [String:Range<Int>]() 
         for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() { 
          ranges[name] = match.rangeAt(n+1).toRange() 
         } 
         return ranges 
        } 
    
        private func nameForIndex(_ index: Int, from: [String:GroupNamesSearchResult]) -> String? { 
         for (name,(_,_,n)) in from { 
          if (n + 1) == index { 
           return name 
          } 
         } 
         return nil 
        } 
    
        func captureGroups(string: String, options: NSRegularExpression.MatchingOptions = []) -> [String:String] { 
         return captureGroups(string: string, options: options, range: NSRange(location: 0, length: string.utf16.count)) 
        } 
    
        func captureGroups(string: String, options: NSRegularExpression.MatchingOptions = [], range: NSRange) -> [String:String] { 
         var dict = [String:String]() 
         let matchResult = matches(in: string, options: options, range: range) 
         let names = try self.textCheckingResultsOfNamedCaptureGroups() 
         for (n,m) in matchResult.enumerated() { 
          for i in (0..<m.numberOfRanges) { 
           let r2 = string.range(from: m.rangeAt(i))! 
           let g = string.substring(with: r2) 
           if let name = nameForIndex(i, from: names) { 
            dict[name] = g 
           } 
          } 
         } 
         return dict 
        } 
    } 
    

使用新的方法captureGroups()的一個例子是:

let node = "'test_literal'" 
    let regex = try NSRegularExpression(pattern: "^(?<all>(?<delimiter>'|\")(?<value>.*)(?:\\k<delimiter>))$", options: NSRegularExpression.Options.dotMatchesLineSeparators) 
    let match2 = regex.captureGroups(string: node, options: NSRegularExpression.MatchingOptions.anchored) 
    print(match2) 

它將打印:

[ 「分隔符」: 「\'」, 「所有」: 「\'test_literal \'」,「value」:「test_literal」]

+0

謝謝,你救了我。 –

2

由於支持iOS11命名的捕獲組。NSTextCheckingResult具有功能open func range(withName name: String) -> NSRange

使用正則表達式:^(?<foo>foo)\.(?<bar>bar)\.(?<bar2>baz)$與測試字符串foo.bar.baz給出4個結果匹配。函數match.range(withName: "bar2")返回字符串的範圍baz

+0

我編寫了一個基於該擴展的擴展來創建所有命名捕獲組及其值的字典:https://stackoverflow.com/a/48309290/235297 –