2016-05-15 15 views
4

當我測試的算法Soundex根據Wikipedia article on Soundex,我發現Tymczak返回T520,不T522菲斯特返回P123,而不是P236輸出是錯誤的案件 - 「Tymczak」和「菲斯特」

我不知道爲什麼輸出不正確。

我的代碼:

using System; 
using System.Collections.Generic; 
using System.Linq; 
using System.Text; 
using System.Text.RegularExpressions; 
using System.Threading.Tasks; 

namespace ConsoleApplication4 
{ 
    class Program 
    { 
    static void Main(string[] args) 
    { 
     string s = "Tymczak"; 
     string result = SoundexByWord(s); 
     Console.WriteLine(result); 
    } 

    private static string Soundex(string data) 
    { 
     string first = "pv"; 
     string second = "xz"; 
     string third = "dt"; 
     string forth = "mn"; 
     string fifth = "bf"; 
     string sixth = "cgj"; 
     string seventh = "kqs"; 
     //ana ast5dtmt string builder 34an 3aml zy al list fy apend 
     // 34an a apend mn al a5r al string 

     StringBuilder result = new StringBuilder(); 
     if (data != null && data.Length > 0) 
     { 
      string previousCode = "", currentCode = "", currentLetter = ""; 

      // append on the string from the last 
      // get the first characheter of the string data 
      // append it on the result 
      //according to algorithm first charachter stays the same 
      result.Append(data.Substring(0, 1)); 
      RemoveUnwantedChar(data); 
      for (int i = 1; i < data.Length; i++) 
      { 
       // nb2d al algorithm first take the second characheter in data 
       //n7wlo la lower 
       currentLetter = data.Substring(i, 1).ToLower(); 
       currentCode = ""; 
       // No string for zero because we will remove it 
       if (first.IndexOf(currentLetter) > -1) 
        //search for bfpv in the current letter return number 
        // -1 is out of string index 
        currentCode = "1"; 

       else if (fifth.IndexOf(currentLetter) > -1) 
        //search for bfpv in the current letter return number 
        // -1 is out of string index 
        currentCode = "1"; 

       else if (sixth.IndexOf(currentLetter) > -1) 
        //search for bfpv in the current letter return number 
        // -1 is out of string index 
        currentCode = "2"; 
       else if (seventh.IndexOf(currentLetter) > -1) 
        //search for bfpv in the current letter return number 
        // -1 is out of string index 
        currentCode = "2"; 
       else if (second.IndexOf(currentLetter) > -1) 
        currentCode = "2"; 
       else if (third.IndexOf(currentLetter) > -1) 
        currentCode = "3"; 
       else if (currentLetter == "l") 
        currentCode = "4"; 
       else if (forth.IndexOf(currentLetter) > -1) 
        currentCode = "5"; 
       else if (currentLetter == "r") 
        currentCode = "6"; 
       if (currentCode != previousCode) 
        result.Append(currentCode); 

       if (result.Length == 4) break; 

       if (currentCode != "") 
        previousCode = currentCode; 
      } 
     } 

     if (result.Length < 4) 
      result.Append(new String('0', 4 - result.Length)); 

     return result.ToString().ToUpper(); 
    } 

    public static string RemoveUnwantedChar(string input) 
    { 
     return Regex.Replace(input, "aeiouyhw", ""); 
    } 
    private static string SoundexByWord(string data) 
    { 
     var soundexes = new List<string>(); 
     foreach (var str in data.Split(' ')) 
     { 
      soundexes.Add(Soundex(str)); 
     } 

     #if Net35OrLower 
     // string.Join in .Net 3.5 and 
     //before require the second parameter to be an array. 
     return string.Join(" ", soundexes.ToArray()); 
     #endif 
     // string.Join in .Net 4 has an overload 
     //that takes IEnumerable<string> 
     return string.Join(" ", soundexes); 
    } 
    } 
} 
+0

您是否試圖實現第二個算法? – TaW

+0

@TaW我不知道如何更新代碼到第二個算法 我認爲他們是相同的步驟,我在代碼 – Bav

+0

也許我只是想念它,但是_Save的第一個字母在哪裏。刪除除第一個字母以外的所有'h'和'w'部分? – TaW

回答

2

這不告訴你,你的代碼出錯,可能甚至不是最快的解決方案,但它似乎得到正確的例子是隻有幾行代碼..

它實現了second version of the algorithm的六個步驟。

string Soundex(string input) 
{ 
    // character groups: the 1st one are vowels to remove 
    // the other groups are characters to replace by the group index 
    List<string> groups = new List<string>() 
         { "aeiouy", "bfpv", "cgjkqsxz", "dt", "l", "mn", "r" }; 

    // save the 1st character (1) 
    string first = input.Substring(0, 1); 
    string s = input.ToLower(); 

    // remove unconditionally (1) 
    s = s.Replace("h", "").Replace("w", ""); 

    // replace characters in all replacement groups (2) 
    for (int g = 1; g < groups.Count; g++) 
     for (int i = 0; i < groups[g].Length; i++) 
      s = s.Replace(groups[g][i], ((char)(g + (byte)'0'))); 

    // replace repeating digits (3) 
    // NOTE: this step actually should be repeated until the length no longer changes!!! 
    for (int i = 1; i < 10; i++) s = s.Replace(i + "" + i, i + ""); 

    // now remove characters from group 0: (4) 
    for (int i = 0; i < groups[0].Length; i++) s = s.Replace(groups[0][i].ToString(), ""); 

    // remove the first if it is a digit (5) 
    if ((s[0] >= '0') && (s[0] <= '9')) s = s.Substring(1); 

    // add saved first to max 3 digits and pad if needed (6) 
    return (first + s.Substring(0, Math.Min(3, s.Length))).PadRight(4, '0'); 
} 
+0

感謝@ Taw的幫助 – Bav

+1

好的,很好。我猜系統會在一到兩天內照顧他們..讓我們刪除這些評論然後.. – TaW