2013-03-08 69 views
0

我很想知道我是否可以檢測變形(如狗/狗),刪除不重要的單詞(「在美國製造」 - > 「in」和「the」不重要)等等,而不用在一個大的PHP代碼塊中硬編碼這樣的許多場景,用戶爲Magento搜索引擎輸入的搜索字符串。我可以在一定程度上處理這個搜索字符串,但它看起來不衛生和醜陋。Magento:改進搜索引擎(變形,無關詞語移除等)

任何建議或使其成爲「intelliegent」搜索引擎的指針?

回答

0

使用這個類:

class Inflection 
{ 
    static $plural = array(
    '/(quiz)$/i' => "$1zes", 
    '/^(ox)$/i' => "$1en", 
    '/([m|l])ouse$/i' => "$1ice", 
    '/(matr|vert|ind)ix|ex$/i' => "$1ices", 
    '/(x|ch|ss|sh)$/i' => "$1es", 
    '/([^aeiouy]|qu)y$/i' => "$1ies", 
    '/(hive)$/i' => "$1s", 
    '/(?:([^f])fe|([lr])f)$/i' => "$1$2ves", 
    '/(shea|lea|loa|thie)f$/i' => "$1ves", 
    '/sis$/i' => "ses", 
    '/([ti])um$/i' => "$1a", 
    '/(tomat|potat|ech|her|vet)o$/i'=> "$1oes", 
    '/(bu)s$/i' => "$1ses", 
    '/(alias)$/i' => "$1es", 
    '/(octop)us$/i' => "$1i", 
    '/(ax|test)is$/i' => "$1es", 
    '/(us)$/i' => "$1es", 
    '/s$/i' => "s", 
    '/$/' => "s" 
    ); 

    static $singular = array(
    '/(quiz)zes$/i' => "$1", 
    '/(matr)ices$/i' => "$1ix", 
    '/(vert|ind)ices$/i' => "$1ex", 
    '/^(ox)en$/i' => "$1", 
    '/(alias)es$/i' => "$1", 
    '/(octop|vir)i$/i' => "$1us", 
    '/(cris|ax|test)es$/i' => "$1is", 
    '/(shoe)s$/i' => "$1", 
    '/(o)es$/i' => "$1", 
    '/(bus)es$/i' => "$1", 
    '/([m|l])ice$/i' => "$1ouse", 
    '/(x|ch|ss|sh)es$/i' => "$1", 
    '/(m)ovies$/i' => "$1ovie", 
    '/(s)eries$/i' => "$1eries", 
    '/([^aeiouy]|qu)ies$/i' => "$1y", 
    '/([lr])ves$/i' => "$1f", 
    '/(tive)s$/i' => "$1", 
    '/(hive)s$/i' => "$1", 
    '/(li|wi|kni)ves$/i' => "$1fe", 
    '/(shea|loa|lea|thie)ves$/i'=> "$1f", 
    '/(^analy)ses$/i' => "$1sis", 
    '/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i' => "$1$2sis", 
    '/([ti])a$/i' => "$1um", 
    '/(n)ews$/i' => "$1ews", 
    '/(h|bl)ouses$/i' => "$1ouse", 
    '/(corpse)s$/i' => "$1", 
    '/(us)es$/i' => "$1", 
    '/s$/i' => "" 
    ); 

    static $irregular = array(
    'move' => 'moves', 
    'foot' => 'feet', 
    'goose' => 'geese', 
    'sex' => 'sexes', 
    'child' => 'children', 
    'man' => 'men', 
    'tooth' => 'teeth', 
    'person' => 'people', 
    'admin' => 'admin' 
    ); 

    static $uncountable = array(
    'sheep', 
    'fish', 
    'deer', 
    'series', 
    'species', 
    'money', 
    'rice', 
    'information', 
    'equipment' 
    ); 

    public static function pluralize($string) 
    { 
global $irregularWords; 

// save some time in the case that singular and plural are the same 
    if (in_array(strtolower($string), self::$uncountable)) 
     return $string; 

    // check for irregular singular forms 
    foreach ($irregularWords as $pattern => $result) 
    { 
     $pattern = '/' . $pattern . '$/i'; 

     if (preg_match($pattern, $string)) 
      return preg_replace($pattern, $result, $string); 
    } 

    // check for irregular singular forms 
    foreach (self::$irregular as $pattern => $result) 
    { 
     $pattern = '/' . $pattern . '$/i'; 

     if (preg_match($pattern, $string)) 
      return preg_replace($pattern, $result, $string); 
    } 

    // check for matches using regular expressions 
    foreach (self::$plural as $pattern => $result) 
    { 
     if (preg_match($pattern, $string)) 
      return preg_replace($pattern, $result, $string); 
    } 

    return $string; 
    } 

    public static function singularize($string) 
    { 
global $irregularWords; 
    // save some time in the case that singular and plural are the same 
    if (in_array(strtolower($string), self::$uncountable)) 
     return $string; 

// check for irregular words 
    foreach ($irregularWords as $result => $pattern) 
    { 
     $pattern = '/' . $pattern . '$/i'; 

     if (preg_match($pattern, $string)) 
      return preg_replace($pattern, $result, $string); 
    } 

// check for irregular plural forms 
    foreach (self::$irregular as $result => $pattern) 
    { 
     $pattern = '/' . $pattern . '$/i'; 

     if (preg_match($pattern, $string)) 
      return preg_replace($pattern, $result, $string); 
    } 

// check for matches using regular expressions 
    foreach (self::$singular as $pattern => $result) 
    { 
     if (preg_match($pattern, $string)) 
      return preg_replace($pattern, $result, $string); 
    } 

    return $string; 
    } 

    public static function pluralize_if($count, $string) 
    { 
    if ($count == 1) 
     return "1 $string"; 
    else 
     return $count . " " . self::pluralize($string); 
    } 
} 

如果你有時間使用拐點使用一種標準的方式:http://en.wikipedia.org/wiki/Inflection

可以作爲陣列使用XML結合,從而把所有的拐點數據,看看如何codeigniter的變形非常友好:http://ellislab.com/codeigniter/user-guide/helpers/inflector_helper.html

許多框架都支持內置拐點,但它只會專注於主要英語。對於其他語言,您應該自己編寫...或者如果您需要,可以使用unicode.org以及其他語言的某些轉換標準。