您可以使用Levenshtein distance
編輯
因爲我最終需要類似的東西我自己,這個問題仍然開放。這是我玩過的一些代碼。既提高了字符串距離,也將Levenshtein算法應用於路徑標記。
C++代碼
/*
----- String based Levenshtein ----
Matching : this/is/a/strange/path
0 : this/is/a/strange/path
2 : this/is/a/strong/path
2 : that/is/a/strange/path
4 : is/this/a/strange/path
5 : this/is/a/strange
13 : this/is/a
15 : this/is
16 : that/was
18 : this
24 : completely/different/folder
----- Path based Levenshtein ----
Matching : this/is/a/strange/path
0 : this/is/a/strange/path
1 : this/is/a/strange
2 : this/is/a/strong/path
2 : that/is/a/strange/path
2 : this/is/a
2 : is/this/a/strange/path
3 : this/is
4 : this
7 : that/was
8 : completely/different/folder
*/
#include <string>
#include <vector>
#include <algorithm>
#include <stdio.h>
/// returns the smaller of two parameters
template< typename T >
T levmin(T v1, T v2)
{ return (v1 < v2) ? v1 : v2; }
/// Returns the Levenshtein distance between the specified strings
template < typename T, typename T_STR >
typename T_STR::size_type levstr(const T_STR &s1, const T_STR &s2)
{
typename T_STR::size_type l1 = s1.length(), l2 = s2.length();
std::vector< typename T_STR::size_type > d((l1 + 1) * (l2 + 1));
typename T_STR::size_type i, j;
for (i = 0; i <= l1; i++)
d[ i * l2 ] = i;
for (i = 0; i <= l2; i++)
d[ i ] = i;
for (i = 1; i <= l1; i++)
for (j = 1; j <= l2; j++)
d[ i * l2 + j ] = levmin(levmin(d[ (i - 1) * l2 + j ] + 1, d[ i * l2 + (j - 1) ] + 1),
d[ (i - 1) * l2 + (j - 1) ] + (s1[ i - 1 ] == s2[ j - 1 ] ? 0 : 1)
);
return d[ (l1 * l2) + l2 ];
}
/// Returns the Levenshtein distance between the specified split paths
template < typename T, typename T_STR, typename T_LST >
typename T_STR::size_type levpath(const T_LST &lst1, const T_LST &lst2)
{
typename T_STR::size_type l1 = lst1.size(), l2 = lst2.size();
std::vector< typename T_STR::size_type > d((l1 + 1) * (l2 + 1));
typename T_STR::size_type i, j;
for (i = 0; i <= l1; i++)
d[ i * l2 ] = i;
for (i = 0; i <= l2; i++)
d[ i ] = i;
for (i = 1; i <= l1; i++)
for (j = 1; j <= l2; j++)
d[ i * l2 + j ] = levmin(levmin(d[ (i - 1) * l2 + j ] + 1, d[ i * l2 + (j - 1) ] + 1),
d[ (i - 1) * l2 + (j - 1) ] + levstr< T, T_STR>(lst1[ i - 1 ], lst2[ j - 1 ])
);
return d[ (l1 * l2) + l2 ];
}
/// Generic split path function
/*
Returns a vector of path tokens
*/
template < typename T, typename T_STR, typename T_LST >
T_LST splitpath(const T_STR &s, const T sep)
{ T_LST lst;
typename T_STR::size_type i = 0, l = 0;
while(T_STR::npos != (i = s.find_first_of(sep, i)))
{ if (l < i)
lst.push_back(T_STR(s, l, i - l));
l = ++i;
} // end while
if (l < s.length())
lst.push_back(T_STR(s, l, s.length() - l));
return lst;
}
/// Generic join path function
/*
Returns a string of joined vector tokens
*/
template < typename T, typename T_STR, typename T_LST >
T_STR joinpath(const T_LST &p, const T sep)
{ T_STR s;
for (typename T_LST::const_iterator it = p.begin(); it != p.end(); it++)
{ if (s.length()) s += sep; s += *it; }
return s;
}
// String types
typedef char t_levchar;
typedef std::basic_string<t_levchar> t_levstr;
typedef std::vector<t_levstr> t_levpath;
typedef std::vector<t_levpath> t_levpathlist;
// Sort compare for split paths
template< typename T, typename T_STR, typename T_LST > struct levcmp
{ levcmp(const T_LST &p) { m_p = p; }
bool operator() (const T_LST &i, const T_LST &j)
{ return levpath< T, T_STR, T_LST >(i, m_p) < levpath< T, T_STR, T_LST >(j, m_p); }
T_LST m_p;
};
// Sort compare for strings
template< typename T, typename T_STR > struct levcmp_str
{ levcmp_str(const T_STR &s) { m_s = s; }
bool operator() (const T_STR &i, const T_STR &j)
{ return levstr< T, T_STR >(i, m_s) < levstr< T, T_STR >(j, m_s); }
T_STR m_s;
};
int main(int argc, char* argv[])
{
// Path to compare with
const t_levchar* compare_path = "this/is/a/strange/path";
// Paths to sort
const t_levchar* path_list[] =
{
"this/is/a/strong/path",
"that/is/a/strange/path",
"this/is/a/strange",
"this/is",
"this/is/a",
"this",
"this/is/a/strange/path",
"is/this/a/strange/path",
"that/was",
"completely/different/folder",
0
};
printf("\n----- String based Levenshtein ----\n"
"Matching : %s\n\n", compare_path);
// Create vector from paths
std::vector<t_levstr> paths;
for(int i = 0; path_list[ i ]; i++)
paths.push_back(path_list[ i ]);
// Sort the paths
std::sort(paths.begin(), paths.end(), levcmp_str< t_levchar, t_levstr >(compare_path));
// Show the result
for (std::vector<t_levstr>::iterator it = paths.begin(); it != paths.end(); it++)
printf("%d : %s\n",
(int)levstr< t_levchar, t_levstr >(*it, compare_path),
(const char*)it->c_str());
printf("\n----- Path based Levenshtein ----\n"
"Matching : %s\n\n", compare_path);
// Create vector from split paths
t_levpath splitcompare = splitpath< t_levchar, t_levstr, t_levpath >(compare_path, '/');
t_levpathlist splitpaths;
for (int i = 0; path_list[ i ]; i++)
splitpaths.push_back(splitpath< t_levchar, t_levstr, t_levpath >(path_list[ i ], '/'));
// Sort the paths
std::sort(splitpaths.begin(), splitpaths.end(),
levcmp< t_levchar, t_levstr, t_levpath >(splitcompare));
// Show the results
for (t_levpathlist::iterator it = splitpaths.begin(); it != splitpaths.end(); it++)
printf("%d : %s\n",
(int)levpath< t_levchar, t_levstr, t_levpath >(*it, splitcompare),
(const char*)joinpath< t_levchar, t_levstr, t_levpath >(*it, '/').c_str());
return 0;
}
當你說這是在地圖中,你是指「std :: map」還是一個向量?他們排序? – 2012-02-14 22:24:07