import re
from typing import Pattern
# Compiled regular expression for performance and reuse.
# • Limits total length to 254 characters (the maximum allowed by RFC 5321).
# • Limits local‑part length to 64 characters.
# • Allows the typical set of characters permitted in the local part.
# • Enforces a domain consisting of labels separated by dots, each label
# starting and ending with an alphanumeric character and optionally
# containing hyphens. The top‑level domain must be at least two letters.
_EMAIL_REGEX: Pattern[str] = re.compile(
r"^(?=.{1,254}$)(?=.{1,64}@)[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+"
r"(?:\.[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+)*"
r"@"
r"(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\.)+"
r"[A-Za-z]{2,}$"
)
def is_valid_email(email: str) -> bool:
"""Validate an email address.
The validation follows a pragmatic subset of RFC 5322 suitable for most
everyday use cases. It checks:
* Overall length (max 254 characters)
* Local‑part length (max 64 characters)
* Allowed characters in the local part
* Proper domain format with at least one dot and a TLD of 2+ letters
Parameters
----------
email: str
The email address to validate.
Returns
-------
bool
``True`` if the address matches the pattern, ``False`` otherwise.
"""
if not isinstance(email, str):
return False
return _EMAIL_REGEX.fullmatch(email) is not None
# ---------------------------------------------------------------------------
# Example usage
# ---------------------------------------------------------------------------
if __name__ == "__main__":
test_addresses = [
"[email protected]",
"[email protected]",
"[email protected]",
"[email protected]",
"invalid-email@",
"@missing-local.org",
"[email protected]",
"toolongdomainpart@" + "a" * 64 + ".com",
]
for addr in test_addresses:
print(f"{addr!r:50} -> {is_valid_email(addr)}")