Source code for dbtk.etl.transforms.email

# dbtk/etl/transforms/email.py
"""
Email address validation and cleaning functions.

Uses a practical regex that catches most valid emails and rejects
obviously invalid ones. Not RFC 5322 compliant but good enough for
most data validation needs.
"""

import re

# Email validation regex - practical but not RFC-compliant
emailPattern = re.compile(
    r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
)


[docs] def email_clean(val: str) -> str: """ Clean and normalize email address. Args: val: Email address string Returns: Cleaned email address (lowercase, stripped) or empty string if invalid Example: email_clean(" User@Example.COM ") # "user@example.com" email_clean("invalid.email") # "" email_clean("user@domain.co") # "user@domain.co" """ if not val: return '' val_clean = str(val).strip().lower() if email_validate(val_clean): return val_clean else: return ''
[docs] def email_validate(val: str) -> bool: """ Validate email address format. Uses a practical regex that catches most valid emails and rejects obviously invalid ones. Not RFC 5322 compliant but good enough for most data validation needs. Args: val: Email address string to validate Returns: True if email appears valid, False otherwise Example: email_validate("user@example.com") # True email_validate("user.name@domain.co") # True email_validate("invalid.email") # False email_validate("@domain.com") # False email_validate("") # False """ if not val: return False val_clean = str(val).strip() if not val_clean: return False return bool(emailPattern.match(val_clean))