我有一個要求,確保電子郵件地址包含標準的美國英文字符。沒有辯論這是什麼意思,這是否是可取的,我想知道是否只有一個正則表達式足以滿足這個要求,還是我還需要檢查字符集?正則表達式是否足夠,還是我需要檢查編碼?
是否有其中UTF-8字符可以通過正則表達式,但不是在在正則表達式中使用的拉丁字符集的情況?
這是一些代碼,我使用的,它似乎對我來說,正則表達式是足夠的,但我想一個第二意見。
package misc;
import java.io.UnsupportedEncodingException;
public class ValidateCharacterSet {
public static void main(String args[]) {
String czech = "Český";
String japanese = "日本語";
String spanish = "¡Qué magnifico es java!";
String english = "[email protected]";
System.out.println("iso check: " + czech + ":" + isISO8859(czech));
System.out.println("iso check: " + japanese + ":" + isISO8859(japanese));
System.out.println("iso check: " + spanish + ":" + isISO8859(spanish));
System.out.println("iso check: " + english + ":" + isISO8859(english));
System.out.println("");
System.out.println("regex match: " + czech + ":" + playWithMatches(czech));
System.out.println("regex match: " + japanese + ":" + playWithMatches(japanese));
System.out.println("regex match: " + spanish + ":" + playWithMatches(spanish));
System.out.println("regex match: " + english + ":" + playWithMatches(english));
}
/**
* Returns true if the string is null, or the original string (str) equals the string (encodedAsISO8859)
* that was encoded ISO-8859-1.
*
* @param str String containing bytes for which to check the encoding.
* @return True if the string is in ISO-8859-1 format.
*/
private static boolean isISO8859(String str) {
// a null string is compliant by definition.
if (str == null) {
return true;
}
try {
byte[] iso88591Data = str.getBytes("ISO-8859-1");
String encodedAsISO8859 = new String(iso88591Data, "ISO-8859-1");
if (str.equals(encodedAsISO8859)) {
return true;
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return false;
}
private static boolean playWithMatches(String str) {
return (str != null && str.matches("[A-Za-z0-9\\-_\\[email protected]\\+]+"));
}
}