Harry: RegExps für eMail-Adressen

Beitrag lesen

Holladiewaldfee,

so und jetzt zum zweiten Teil, dem Ausdruck für eMail-Adressen, auch gnadenlos aus meinem Projekt rauskopiert. Problem: Mit den Erweiterungen nach RFC 2368 kann er nichts anfangen. Wenn einer Lust hat das zu lösen: Viel Spaß.

Wie man diesen Ausdruck jetzt auf mailto:-Angaben anwendet darf jeder selber rausfinde, das ist ja nicht sooo schwer ;)


# Original von J. Friedl und Andre Malo (in Perl)   # (modifiziert von Harald Breitkreutz)   # Added: IPv6-Support   if(!preg_match("/[\200-\377]/", $data["email"]))   {    # RFC 822    $mcexp_esc         = '\\';    $mcexp_Period      = '.';    $mcexp_space       = '\040';    $mcexp_tab         = '\t';    $mcexp_OpenBR      = '[';    $mcexp_CloseBR     = ']';    $mcexp_OpenParen   = '(';    $mcexp_CloseParen  = ')'; #   $mcexp_NonASCII    = '\x80-\xff'; # Eigentlich noch plus \0177 (= /x7f)    $mcexp_NonASCII    = '\x7f-\xff';    $mcexp_ctrl        = '\000-\037';    $mcexp_CRlist      = '\n\015';    $mcexp_qtext       = '[^'.$mcexp_esc.$mcexp_NonASCII.$mcexp_CRlist.'"]';    $mcexp_dtext       = '[^'.$mcexp_esc.$mcexp_NonASCII.$mcexp_CRlist.$mcexp_OpenBR.$mcexp_CloseBR.']';    $mcexp_quoted_pair = $mcexp_esc.'[^'.$mcexp_NonASCII.']';    $mcexp_ctext       = '[^'.$mcexp_esc.$mcexp_NonASCII.$mcexp_CRlist.'()]';    $mcexp_Cnested     = $mcexp_OpenParen.$mcexp_ctext.'(?:'.$mcexp_quoted_pair.$mcexp_ctext.')'.$mcexp_CloseParen;    $mcexp_comment     = $mcexp_OpenParen.$mcexp_ctext."(?:(?:".$mcexp_quoted_pair."|".$mcexp_Cnested.")".$mcexp_ctext.")".$mcexp_CloseParen;    $mcexp_X           = "[".$mcexp_space.$mcexp_tab."](?:".$mcexp_comment."[".$mcexp_space.$mcexp_tab."])";    $mcexp_atom_char   = "[^(".$mcexp_space.')<>@,;:".'.$mcexp_esc.$mcexp_OpenBR.$mcexp_CloseBR.$mcexp_ctrl.$mcexp_NonASCII."]";    $mcexp_atom        = $mcexp_atom_char."+(?!".$mcexp_atom_char.")";    $mcexp_quoted_str  = '"'.$mcexp_qtext."(?:".$mcexp_quoted_pair.$mcexp_qtext.')"';    $mcexp_word        = "(?:".$mcexp_atom."|".$mcexp_quoted_str.")";    $mcexp_domain_ref  = $mcexp_atom;    $mcexp_domain_lit  = $mcexp_OpenBR."(?:".$mcexp_dtext."|".$mcexp_quoted_pair.")*".$mcexp_CloseBR;    $mcexp_sub_domain  = "(?:".$mcexp_domain_ref."|".$mcexp_domain_lit.")".$mcexp_X;

$mcexp_phrase_ctrl  = "\000-\010\012-\037";    $mcexp_phrase_char  = '[^()<>@,;:".'.$mcexp_esc.$mcexp_OpenBR.$mcexp_CloseBR.$mcexp_NonASCII.$mcexp_phrase_ctrl."]";    $mcexp_phrase      = $mcexp_word.$mcexp_phrase_char."(?:(?:".$mcexp_comment."|".$mcexp_quoted_str.")".$mcexp_phrase_char.")*";

$mcexp_local_part   = $mcexp_word.$mcexp_X."(?:".$mcexp_Period.$mcexp_X.$mcexp_word.$mcexp_X.")*";

$mcexp_domain_lang    = $mcexp_sub_domain."(?:".$mcexp_Period.$mcexp_X.$mcexp_sub_domain.")".$mcexp_Period."[A-Za-z][A-Za-z][A-Za-z]?[A-Za-z]?";    $mcexp_route_lang        = "@".$mcexp_X.$mcexp_domain_lang."(?:,".$mcexp_X."@".$mcexp_X.$mcexp_domain_lang."):".$mcexp_X;    $mcexp_addr_spec_lang    = $mcexp_local_part."@".$mcexp_X.$mcexp_domain_lang;    $mcexp_route_addr_lang   = "[<".$mcexp_X."(?:".$mcexp_route_lang.")?".$mcexp_addr_spec_lang.">]";    $mcexp_email_lang        = "(".$mcexp_X."(?:".$mcexp_addr_spec_lang."|".$mcexp_phrase.$mcexp_route_addr_lang."))";

$mcexp_domain_kurz   = $mcexp_sub_domain."(?:".$mcexp_Period.$mcexp_X.$mcexp_sub_domain.")";    $mcexp_route_kurz        = "@".$mcexp_X.$mcexp_domain_kurz."(?:,".$mcexp_X."@".$mcexp_X.$mcexp_domain_kurz."):".$mcexp_X;    $mcexp_addr_spec_kurz    = $mcexp_local_part."@".$mcexp_X.$mcexp_domain_kurz;    $mcexp_route_addr_kurz   = "[<".$mcexp_X."(?:".$mcexp_route_kurz.")?".$mcexp_addr_spec_kurz.">]";    $mcexp_email_kurz        = "(".$mcexp_X."(?:".$mcexp_addr_spec_kurz."|".$mcexp_phrase.$mcexp_route_addr_kurz."))";

# RFC 820    $mcexp_domain_ip4_part  = "([0-9]|([1-9][0-9])|([1][0-9]{2})|(2))";    $mcexp_domain_ip4_part_last = "([1-9]|([1-9][0-9])|([1][0-9]{2})|(2))";    $mcexp_domain_ip4    = "((".$mcexp_domain_ip4_part.$mcexp_Period."){3}".$mcexp_domain_ip4_part_last.")";    $mcexp_route_ip4         = "@".$mcexp_X.$mcexp_domain_ip4."(?:,".$mcexp_X."@".$mcexp_X.$mcexp_domain_ip4.")*:".$mcexp_X;    $mcexp_addr_spec_ip4     = $mcexp_local_part."@".$mcexp_X.$mcexp_domain_ip4;    $mcexp_route_addr_ip4    = "[<".$mcexp_X."(?:".$mcexp_route_ip4.")?".$mcexp_addr_spec_ip4.">]";    $mcexp_email_ip4        = "(".$mcexp_X."(?:".$mcexp_addr_spec_ip4."|".$mcexp_phrase.$mcexp_route_addr_ip4."))";

# RFC 1883    # Please note that IPv6-Checking does not check for reserved adresses.    # This is quite impossible due to restrictions of regular expressions concerning the number of capturing parentheses.    # Also, this pattern still validates adresses like A:B:C:1:2::3:4:5:6, that means shortend forms that do in fact contain    # too many ":". There's no real (say: beautiful) way out of this.    $mcexp_domain_ip6_part  = "(0|([1-9a-fA-F][0-9a-fA-F]{0,3}))";    $mcexp_domain_ip6_pure  = "((".$mcexp_domain_ip6_part.":){7}".$mcexp_domain_ip6_part.")";    $mcexp_domain_ip6_short  = "((((".$mcexp_domain_ip6_part.":){1,6})|(:)):((".$mcexp_domain_ip6_part.":){0,5})".$mcexp_domain_ip6_part.")";    $mcexp_domain_ip6   = "(".$mcexp_domain_ip6_pure."|".$mcexp_domain_ip6_short.")";    $mcexp_route_ip6        = "@".$mcexp_X.$mcexp_domain_ip6."(?:,".$mcexp_X."@".$mcexp_X.$mcexp_domain_ip6.")*:".$mcexp_X;    $mcexp_addr_spec_ip6     = $mcexp_local_part."@".$mcexp_X.$mcexp_domain_ip6;    $mcexp_route_addr_ip6    = "[<".$mcexp_X."(?:".$mcexp_route_ip6.")?".$mcexp_addr_spec_ip6.">]";    $mcexp_email_ip6        = "(".$mcexp_X."(?:".$mcexp_addr_spec_ip6."|".$mcexp_phrase.$mcexp_route_addr_ip6."))";

$mcexp_domain_ip6_4   = "((".$mcexp_domain_ip6_part.":){4}".$mcexp_domain_ip4.")";    $mcexp_route_ip6_4        = "@".$mcexp_X.$mcexp_domain_ip6_4."(?:,".$mcexp_X."@".$mcexp_X.$mcexp_domain_ip6_4.")*:".$mcexp_X;    $mcexp_addr_spec_ip6_4   = $mcexp_local_part."@".$mcexp_X.$mcexp_domain_ip6_4;    $mcexp_route_addr_ip6_4  = "[<".$mcexp_X."(?:".$mcexp_route_ip6_4.")?".$mcexp_addr_spec_ip6_4.">]";    $mcexp_email_ip6_4       = "(".$mcexp_X."(?:".$mcexp_addr_spec_ip6_4."|".$mcexp_phrase.$mcexp_route_addr_ip6_4."))";

$mcexp_domain_ip6_4_short  = "(((".$mcexp_domain_ip6_part.":){1,3}|:):(".$mcexp_domain_ip6_part.":){0,2}".$mcexp_domain_ip4.")";    $mcexp_route_ip6_4_short      = "@".$mcexp_X.$mcexp_domain_ip6_4_short."(?:,".$mcexp_X."@".$mcexp_X.$mcexp_domain_ip6_4_short.")*:".$mcexp_X;    $mcexp_addr_spec_ip6_4_short  = $mcexp_local_part."@".$mcexp_X.$mcexp_domain_ip6_4_short;    $mcexp_route_addr_ip6_4_short  = "[<".$mcexp_X."(?:".$mcexp_route_ip6_4_short.")?".$mcexp_addr_spec_ip6_4_short.">]";    $mcexp_email_ip6_4_short       = "(".$mcexp_X."(?:".$mcexp_addr_spec_ip6_4_short."|".$mcexp_phrase.$mcexp_route_addr_ip6_4_short."))";

# Checks    $check_array = Array();    $check_array["email_lang"] = "/^".$mcexp_email_lang."$/x";    if(fcms_bEMAILRFCALLOWINTRANET==true)     $check_array["email_kurz"] = "/^".$mcexp_email_kurz."$/x";    if(fcms_bEMAILRFCALLOWIP==true)    { $check_array["email_ip"] = "/^".$mcexp_email_ip."$/x";     $check_array["email_ip4"] = "/^".$mcexp_email_ip4."$/x";     $check_array["email_ip6"] = "/^".$mcexp_email_ip6."$/x";     $check_array["email_ip6_4"] = "/^".$mcexp_email_ip6_4."$/x";     $check_array["email_ip6_4_short"] = "/^".$mcexp_email_ip6_4_short."$/x"; }

# Verschachtelte Kommentare aus der Zeichenkette entfernen:    $mailbackup = $data["email"];    do    { $oldsMail = $data["email"];     $sMail = preg_replace("/([^()]*)/", "", $data["email"]); }    while($oldsMail!=$data["email"]);

# Validieren    $validated = false;    while(list($protocol, $protregexp) = each($check_array))    { if(preg_match($protregexp, $data["email"]))      $validated = true; }

$data["email"] = $mailbackup;    $data["valid"] = $validated;   }   else   { # Ungültige Non-Ascii-Zeichen im String !    $data["valid"] = false;   }

Wer diese Ausdrücke einsetzt und damit Kilowatt-weise Strom für unnötige Überprüfungen verbrät ist natürlich selber schuld.

Ciao,

Harry

--
  Man weiß erst was man hatte, wenn man es verloren hat.   42? Eher sh:| fo:) ch:] rl:° br:& n4:° ie:% mo:) va:) de:[ zu:) fl:( ss:) ls:[ js:|