Add new optional beta email parser thats based on ImapEngine instead of Webklex

This commit is contained in:
johnnyq
2026-02-26 16:11:49 -05:00
parent 1ba19cc249
commit 9cb1ff7330
682 changed files with 101834 additions and 8 deletions

View File

@@ -0,0 +1,226 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use Psr\Log\LoggerInterface;
use Psr\Log\LogLevel;
use ZBateson\MailMimeParser\ErrorBag;
use ZBateson\MailMimeParser\Header\Consumer\IConsumerService;
use ZBateson\MailMimeParser\Header\Part\CommentPart;
use ZBateson\MailMimeParser\MailMimeParser;
/**
* Abstract base class representing a mime email's header.
*
* The base class sets up the header's consumer for parsing, sets the name of
* the header, and calls the consumer to parse the header's value.
*
* @author Zaahid Bateson
*/
abstract class AbstractHeader extends ErrorBag implements IHeader
{
/**
* @var string the name of the header
*/
protected string $name;
/**
* @var IHeaderPart[] all parts not including CommentParts.
*/
protected array $parts = [];
/**
* @var IHeaderPart[] the header's parts (as returned from the consumer),
* including commentParts
*/
protected array $allParts = [];
/**
* @var string the raw value
*/
protected string $rawValue;
/**
* @var string[] array of comments, initialized on demand in getComments()
*/
private ?array $comments = null;
/**
* Assigns the header's name and raw value, then calls parseHeaderValue to
* extract a parsed value.
*
* @param IConsumerService $consumerService For parsing the value.
* @param string $name Name of the header.
* @param string $value Value of the header.
*/
public function __construct(
LoggerInterface $logger,
IConsumerService $consumerService,
string $name,
string $value
) {
parent::__construct($logger);
$this->name = $name;
$this->rawValue = $value;
$this->parseHeaderValue($consumerService, $value);
}
/**
* Filters $this->allParts into the parts required by $this->parts
* and assigns it.
*
* The AbstractHeader::filterAndAssignToParts method filters out CommentParts.
*/
protected function filterAndAssignToParts() : void
{
$this->parts = \array_values(\array_filter($this->allParts, function($p) {
return !($p instanceof CommentPart);
}));
}
/**
* Calls the consumer and assigns the parsed parts to member variables.
*
* The default implementation assigns the returned value to $this->allParts
* and filters out comments from it, assigning the filtered array to
* $this->parts by calling filterAndAssignToParts.
*/
protected function parseHeaderValue(IConsumerService $consumer, string $value) : void
{
$this->allParts = $consumer($value);
$this->filterAndAssignToParts();
}
/**
* @return IHeaderPart[]
*/
public function getParts() : array
{
return $this->parts;
}
/**
* @return IHeaderPart[]
*/
public function getAllParts() : array
{
return $this->allParts;
}
/**
* @return string[]
*/
public function getComments() : array
{
if ($this->comments === null) {
$this->comments = \array_map(fn (IHeaderPart $c) => $c->getComment(), \array_merge(...\array_map(
fn ($p) => ($p instanceof CommentPart) ? [$p] : $p->getComments(),
$this->allParts
)));
}
return $this->comments;
}
public function getValue() : ?string
{
if (!empty($this->parts)) {
return $this->parts[0]->getValue();
}
return null;
}
public function getRawValue() : string
{
return $this->rawValue;
}
public function getName() : string
{
return $this->name;
}
public function __toString() : string
{
return "{$this->name}: {$this->rawValue}";
}
public function getErrorLoggingContextName() : string
{
return 'Header::' . $this->getName();
}
protected function getErrorBagChildren() : array
{
return $this->getAllParts();
}
protected function validate() : void
{
if (\strlen(\trim($this->name)) === 0) {
$this->addError('Header doesn\'t have a name', LogLevel::ERROR);
}
if (\strlen(\trim($this->rawValue)) === 0) {
$this->addError('Header doesn\'t have a value', LogLevel::NOTICE);
}
}
/**
* Checks if the passed $value parameter is null, and if so tries to parse
* a header line from $nameOrLine splitting on first occurrence of a ':'
* character.
*
* The returned array always contains two elements. The first being the
* name (or blank if a ':' char wasn't found and $value is null), and the
* second being the value.
*
* @return string[]
*/
protected static function getHeaderPartsFrom(string $nameOrLine, ?string $value = null) : array
{
$namePart = $nameOrLine;
$valuePart = $value;
if ($value === null) {
// full header line
$parts = \explode(':', $nameOrLine, 2);
$namePart = (\count($parts) > 1) ? $parts[0] : '';
$valuePart = \trim((\count($parts) > 1) ? $parts[1] : $parts[0]);
}
return [$namePart, $valuePart];
}
/**
* Parses the passed parameters into an IHeader object.
*
* The type of returned IHeader is determined by the name of the header.
* See {@see HeaderFactory::newInstance} for more details.
*
* The required $nameOrLine parameter may contain either the name of a
* header to parse, or a full header line, e.g. From: email@example.com. If
* passing a full header line, the $value parameter must be set to null (the
* default).
*
* Note that more specific types can be called on directly. For instance an
* AddressHeader may be created by calling AddressHeader::from() which will
* ignore the name of the header, and always return an AddressHeader, or by
* calling `new AddressHeader('name', 'value')` directly.
*
* @param string $nameOrLine The header's name or full header line.
* @param string|null $value The header's value, or null if passing a full
* header line to parse.
*/
public static function from(string $nameOrLine, ?string $value = null) : IHeader
{
$parts = static::getHeaderPartsFrom($nameOrLine, $value);
$container = MailMimeParser::getGlobalContainer();
$hf = $container->get(HeaderFactory::class);
if (self::class !== static::class) {
return $hf->newInstanceOf($parts[0], $parts[1], static::class);
}
return $hf->newInstance($parts[0], $parts[1]);
}
}

View File

@@ -0,0 +1,138 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Consumer\AddressBaseConsumerService;
use ZBateson\MailMimeParser\Header\Part\AddressGroupPart;
use ZBateson\MailMimeParser\Header\Part\AddressPart;
use ZBateson\MailMimeParser\MailMimeParser;
/**
* A header containing one or more email addresses and/or groups of addresses.
*
* An address is separated by a comma, and each group separated by a semi-colon.
* The AddressHeader provides a complete list of all addresses referenced in a
* header including any addresses in groups, in addition to being able to access
* the groups separately if needed.
*
* For full specifications, see {@link https://www.ietf.org/rfc/rfc2822.txt}
*
* @author Zaahid Bateson
*/
class AddressHeader extends AbstractHeader
{
/**
* @var AddressPart[] array of addresses, included all addresses contained
* in groups.
*/
protected array $addresses = [];
/**
* @var AddressGroupPart[] array of address groups (lists).
*/
protected array $groups = [];
public function __construct(
string $name,
string $value,
?LoggerInterface $logger = null,
?AddressBaseConsumerService $consumerService = null
) {
$di = MailMimeParser::getGlobalContainer();
parent::__construct(
$logger ?? $di->get(LoggerInterface::class),
$consumerService ?? $di->get(AddressBaseConsumerService::class),
$name,
$value
);
}
/**
* Filters $this->allParts into the parts required by $this->parts
* and assignes it.
*
* The AbstractHeader::filterAndAssignToParts method filters out CommentParts.
*/
protected function filterAndAssignToParts() : void
{
parent::filterAndAssignToParts();
foreach ($this->parts as $part) {
if ($part instanceof AddressPart) {
$this->addresses[] = $part;
} elseif ($part instanceof AddressGroupPart) {
$this->addresses = \array_merge($this->addresses, $part->getAddresses());
$this->groups[] = $part;
}
}
}
/**
* Returns all address parts in the header including any addresses that are
* in groups (lists).
*
* @return AddressPart[] The addresses.
*/
public function getAddresses() : array
{
return $this->addresses;
}
/**
* Returns all group parts (lists) in the header.
*
* @return AddressGroupPart[]
*/
public function getGroups() : array
{
return $this->groups;
}
/**
* Returns true if an address exists with the passed email address.
*
* Comparison is done case insensitively.
*
*/
public function hasAddress(string $email) : bool
{
foreach ($this->addresses as $addr) {
if (\strcasecmp($addr->getEmail(), $email) === 0) {
return true;
}
}
return false;
}
/**
* Returns the first email address in the header.
*
* @return ?string The email address
*/
public function getEmail() : ?string
{
if (!empty($this->addresses)) {
return $this->addresses[0]->getEmail();
}
return null;
}
/**
* Returns the name associated with the first email address to complement
* getEmail() if one is set, or null if not.
*
* @return string|null The person name.
*/
public function getPersonName() : ?string
{
if (!empty($this->addresses)) {
return $this->addresses[0]->getName();
}
return null;
}
}

View File

@@ -0,0 +1,338 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use ArrayIterator;
use Iterator;
use NoRewindIterator;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\IHeaderPart;
use ZBateson\MailMimeParser\Header\Part\HeaderPartFactory;
use ZBateson\MailMimeParser\Header\Part\MimeToken;
/**
* Abstract base class for all header token consumers.
*
* Defines the base parser that loops over tokens, consuming them and creating
* header parts.
*
* @author Zaahid Bateson
*/
abstract class AbstractConsumerService implements IConsumerService
{
protected LoggerInterface $logger;
/**
* @var HeaderPartFactory used to construct IHeaderPart objects
*/
protected HeaderPartFactory $partFactory;
/**
* @var AbstractConsumerService[] array of sub-consumers used by this
* consumer if any, or an empty array if none exist.
*/
protected array $subConsumers = [];
/**
* @var ?string the generated token split pattern on first run, so it doesn't
* need to be regenerated every time.
*/
private ?string $tokenSplitPattern = null;
/**
* @param AbstractConsumerService[] $subConsumers
*/
public function __construct(LoggerInterface $logger, HeaderPartFactory $partFactory, array $subConsumers = [])
{
$this->logger = $logger;
$this->partFactory = $partFactory;
$this->subConsumers = $subConsumers;
}
public function __invoke(string $value) : array
{
$this->logger->debug('Starting {class} for "{value}"', ['class' => static::class, 'value' => $value]);
if ($value !== '') {
$parts = $this->parseRawValue($value);
$this->logger->debug(
'Ending {class} for "{value}": parsed into {cnt} header part objects',
['class' => static::class, 'value' => $value, 'cnt' => \count($parts)]
);
return $parts;
}
return [];
}
/**
* Returns this consumer and all unique sub consumers.
*
* Loops into the sub-consumers (and their sub-consumers, etc...) finding
* all unique consumers, and returns them in an array.
*
* @return AbstractConsumerService[] Array of unique consumers.
*/
protected function getAllConsumers() : array
{
$found = [$this];
do {
$current = \current($found);
$subConsumers = $current->subConsumers;
foreach ($subConsumers as $consumer) {
if (!\in_array($consumer, $found)) {
$found[] = $consumer;
}
}
} while (\next($found) !== false);
return $found;
}
/**
* Parses the raw header value into header parts.
*
* Calls splitTokens to split the value into token part strings, then calls
* parseParts to parse the returned array.
*
* @return \ZBateson\MailMimeParser\Header\IHeaderPart[] the array of parsed
* parts
*/
private function parseRawValue(string $value) : array
{
$tokens = $this->splitRawValue($value);
return $this->parseTokensIntoParts(new NoRewindIterator(new ArrayIterator($tokens)));
}
/**
* Returns an array of regular expression separators specific to this
* consumer.
*
* The returned patterns are used to split the header value into tokens for
* the consumer to parse into parts.
*
* Each array element makes part of a generated regular expression that is
* used in a call to preg_split(). RegEx patterns can be used, and care
* should be taken to escape special characters.
*
* @return string[] Array of regex patterns.
*/
abstract protected function getTokenSeparators() : array;
/**
* Returns a list of regular expression markers for this consumer and all
* sub-consumers by calling getTokenSeparators().
*
* @return string[] Array of regular expression markers.
*/
protected function getAllTokenSeparators() : array
{
$markers = $this->getTokenSeparators();
$subConsumers = $this->getAllConsumers();
foreach ($subConsumers as $consumer) {
$markers = \array_merge($consumer->getTokenSeparators(), $markers);
}
return \array_unique($markers);
}
/**
* Returns a regex pattern used to split the input header string.
*
* The default implementation calls
* {@see AbstractConsumerService::getAllTokenSeparators()} and implodes the
* returned array with the regex OR '|' character as its glue.
*
* @return string the regex pattern
*/
protected function getTokenSplitPattern() : string
{
$sChars = \implode('|', $this->getAllTokenSeparators());
$mimePartPattern = MimeToken::MIME_PART_PATTERN;
return '~(' . $mimePartPattern . '|\\\\\r\n|\\\\.|' . $sChars . ')~ms';
}
/**
* Returns an array of split tokens from the input string.
*
* The method calls preg_split using
* {@see AbstractConsumerService::getTokenSplitPattern()}. The split array
* will not contain any empty parts and will contain the markers.
*
* @param string $rawValue the raw string
* @return string[] the array of tokens
*/
protected function splitRawValue($rawValue) : array
{
if ($this->tokenSplitPattern === null) {
$this->tokenSplitPattern = $this->getTokenSplitPattern();
$this->logger->debug(
'Configuring {class} with token split pattern: {pattern}',
['class' => static::class, 'pattern' => $this->tokenSplitPattern]
);
}
return \preg_split(
$this->tokenSplitPattern,
$rawValue,
-1,
PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
);
}
/**
* Returns true if the passed string token marks the beginning marker for
* the current consumer.
*
* @param string $token The current token
*/
abstract protected function isStartToken(string $token) : bool;
/**
* Returns true if the passed string token marks the end marker for the
* current consumer.
*
* @param string $token The current token
*/
abstract protected function isEndToken(string $token) : bool;
/**
* Constructs and returns an IHeaderPart for the passed string token.
*
* If the token should be ignored, the function must return null.
*
* The default created part uses the instance's partFactory->newInstance
* method.
*
* @param string $token the token
* @param bool $isLiteral set to true if the token represents a literal -
* e.g. an escaped token
* @return ?IHeaderPart The constructed header part or null if the token
* should be ignored.
*/
protected function getPartForToken(string $token, bool $isLiteral) : ?IHeaderPart
{
if ($isLiteral) {
return $this->partFactory->newToken($token, true);
}
// can be overridden with custom PartFactory
return $this->partFactory->newInstance($token);
}
/**
* Iterates through this consumer's sub-consumers checking if the current
* token triggers a sub-consumer's start token and passes control onto that
* sub-consumer's parseTokenIntoParts().
*
* If no sub-consumer is responsible for the current token, calls
* {@see AbstractConsumerService::getPartForToken()} and returns it in an
* array.
*
* @param Iterator<string> $tokens
* @return IHeaderPart[]
*/
protected function getConsumerTokenParts(Iterator $tokens) : array
{
$token = $tokens->current();
$subConsumers = $this->subConsumers;
foreach ($subConsumers as $consumer) {
if ($consumer->isStartToken($token)) {
$this->logger->debug(
'Token: "{value}" in {class} starting sub-consumer {consumer}',
['value' => $token, 'class' => static::class, 'consumer' => \get_class($consumer)]
);
$this->advanceToNextToken($tokens, true);
return $consumer->parseTokensIntoParts($tokens);
}
}
$part = $this->getPartForToken($token, false);
return ($part !== null) ? [$part] : [];
}
/**
* Returns an array of IHeaderPart for the current token on the iterator.
*
* If the current token is a start token from a sub-consumer, the sub-
* consumer's {@see AbstractConsumerService::parseTokensIntoParts()} method
* is called.
*
* @param Iterator<string> $tokens The token iterator.
* @return IHeaderPart[]
*/
protected function getTokenParts(Iterator $tokens) : array
{
$token = $tokens->current();
if ($token === "\\\r\n" || (\strlen($token) === 2 && $token[0] === '\\')) {
$part = $this->getPartForToken(\substr($token, 1), true);
return ($part !== null) ? [$part] : [];
}
return $this->getConsumerTokenParts($tokens);
}
/**
* Determines if the iterator should be advanced to the next token after
* reading tokens or finding a start token.
*
* The default implementation will advance for a start token, but not
* advance on the end token of the current consumer, allowing the end token
* to be passed up to a higher-level consumer.
*
* @param Iterator $tokens The token iterator.
* @param bool $isStartToken true for the start token.
*/
protected function advanceToNextToken(Iterator $tokens, bool $isStartToken) : static
{
$checkEndToken = (!$isStartToken && $tokens->valid());
$isEndToken = ($checkEndToken && $this->isEndToken($tokens->current()));
if (($isStartToken) || ($checkEndToken && !$isEndToken)) {
$tokens->next();
}
return $this;
}
/**
* Iterates over the passed token Iterator and returns an array of parsed
* IHeaderPart objects.
*
* The method checks each token to see if the token matches a sub-consumer's
* start token, or if it matches the current consumer's end token to stop
* processing.
*
* If a sub-consumer's start token is matched, the sub-consumer is invoked
* and its returned parts are merged to the current consumer's header parts.
*
* After all tokens are read and an array of Header\Parts are constructed,
* the array is passed to {@see AbstractConsumerService::processParts} for
* any final processing if there are any parts.
*
* @param Iterator<string> $tokens An iterator over a string of tokens
* @return IHeaderPart[] An array of parsed parts
*/
protected function parseTokensIntoParts(Iterator $tokens) : array
{
$parts = [];
while ($tokens->valid() && !$this->isEndToken($tokens->current())) {
$this->logger->debug('Parsing token: {token} in class: {consumer}', ['token' => $tokens->current(), 'consumer' => static::class]);
$parts = \array_merge($parts, $this->getTokenParts($tokens));
$this->advanceToNextToken($tokens, false);
}
return (empty($parts)) ? [] : $this->processParts($parts);
}
/**
* Performs any final processing on the array of parsed parts before
* returning it to the consumer client. The passed $parts array is
* guaranteed to not be empty.
*
* The default implementation simply returns the passed array after
* filtering out null/empty parts.
*
* @param IHeaderPart[] $parts The parsed parts.
* @return IHeaderPart[] Array of resulting final parts.
*/
protected function processParts(array $parts) : array
{
$this->logger->debug('Processing parts array {parts} in {consumer}', ['parts' => $parts, 'consumer' => static::class]);
return $parts;
}
}

View File

@@ -0,0 +1,69 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
/**
* A minimal implementation of AbstractConsumerService splitting tokens by
* whitespace.
*
* Although the class doesn't have any abstract methods, it's defined as
* abstract because it doesn't define specific sub-consumers as constructor
* dependencies, and so is defined as abstract to avoid its direct use (use
* the concrete GenericConsumerService or GenericConsumerMimeLiteralPartService
* classes instead).
*
* @author Zaahid Bateson
*/
abstract class AbstractGenericConsumerService extends AbstractConsumerService
{
/**
* Returns the regex '\s+' (whitespace) pattern matcher as a token marker so
* the header value is split along whitespace characters.
*
* @return string[] an array of regex pattern matchers
*/
protected function getTokenSeparators() : array
{
return ['\s+'];
}
/**
* AbstractGenericConsumerService doesn't have start/end tokens, and so
* always returns false.
*/
protected function isEndToken(string $token) : bool
{
return false;
}
/**
* AbstractGenericConsumerService doesn't have start/end tokens, and so
* always returns false.
*
* @codeCoverageIgnore
*/
protected function isStartToken(string $token) : bool
{
return false;
}
/**
* Overridden to combine all part values into a single string and return it
* as an array with a single element.
*
* The returned IHeaderPart array consists of a single ContainerPart created
* out of all passed IHeaderParts.
*
* @param \ZBateson\MailMimeParser\Header\IHeaderPart[] $parts
* @return \ZBateson\MailMimeParser\Header\IHeaderPart[]
*/
protected function processParts(array $parts) : array
{
return [$this->partFactory->newContainerPart($parts)];
}
}

View File

@@ -0,0 +1,104 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Iterator;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\IHeaderPart;
use ZBateson\MailMimeParser\Header\Part\HeaderPartFactory;
/**
* Serves as a base-consumer for recipient/sender email address headers (like
* From and To).
*
* AddressBaseConsumerService passes on token processing to its sub-consumer, an
* AddressConsumerService, and collects Part\AddressPart objects processed and
* returned by AddressConsumerService.
*
* @author Zaahid Bateson
*/
class AddressBaseConsumerService extends AbstractConsumerService
{
public function __construct(
LoggerInterface $logger,
HeaderPartFactory $partFactory,
AddressConsumerService $addressConsumerService
) {
parent::__construct($logger, $partFactory, [$addressConsumerService]);
}
/**
* Returns an empty array.
*
* @return string[] an array of regex pattern matchers
*/
protected function getTokenSeparators() : array
{
return [];
}
/**
* Disables advancing for start tokens.
*
* The start token for AddressBaseConsumerService is part of an
* {@see AddressPart} (or a sub-consumer) and so must be passed on.
*/
protected function advanceToNextToken(Iterator $tokens, bool $isStartToken) : static
{
if ($isStartToken) {
return $this;
}
parent::advanceToNextToken($tokens, $isStartToken);
return $this;
}
/**
* AddressBaseConsumerService doesn't have start/end tokens, and so always
* returns false.
*
* @return false
*/
protected function isEndToken(string $token) : bool
{
return false;
}
/**
* AddressBaseConsumerService doesn't have start/end tokens, and so always
* returns false.
*
* @codeCoverageIgnore
* @return false
*/
protected function isStartToken(string $token) : bool
{
return false;
}
/**
* Overridden so tokens aren't handled at this level, and instead are passed
* on to AddressConsumerService.
*
* @return \ZBateson\MailMimeParser\Header\IHeaderPart[]|array
*/
protected function getTokenParts(Iterator $tokens) : array
{
return $this->getConsumerTokenParts($tokens);
}
/**
* Never reached by AddressBaseConsumerService. Overridden to satisfy
* AbstractConsumerService.
*
* @codeCoverageIgnore
*/
protected function getPartForToken(string $token, bool $isLiteral) : ?IHeaderPart
{
return null;
}
}

View File

@@ -0,0 +1,139 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Part\AddressGroupPart;
use ZBateson\MailMimeParser\Header\Part\AddressPart;
use ZBateson\MailMimeParser\Header\Part\MimeToken;
use ZBateson\MailMimeParser\Header\Part\MimeTokenPartFactory;
/**
* Parses a single part of an address header.
*
* Represents a single part of a list of addresses. A part could be one email
* address, or one 'group' containing multiple addresses. The consumer ends on
* finding either a comma token, representing a separation between addresses, or
* a semi-colon token representing the end of a group.
*
* A single email address may consist of just an email, or a name and an email
* address. Both of these are valid examples of a From header:
* - From: jonsnow@winterfell.com
* - From: Jon Snow <jonsnow@winterfell.com>
*
* Groups must be named, for example:
* - To: Winterfell: jonsnow@winterfell.com, Arya Stark <arya@winterfell.com>;
*
* Addresses may contain quoted parts and comments, and names may be mime-header
* encoded.
*
* @author Zaahid Bateson
*/
class AddressConsumerService extends AbstractConsumerService
{
public function __construct(
LoggerInterface $logger,
MimeTokenPartFactory $partFactory,
AddressGroupConsumerService $addressGroupConsumerService,
AddressEmailConsumerService $addressEmailConsumerService,
CommentConsumerService $commentConsumerService,
QuotedStringConsumerService $quotedStringConsumerService
) {
$addressGroupConsumerService->setAddressConsumerService($this);
parent::__construct(
$logger,
$partFactory,
[
$addressGroupConsumerService,
$addressEmailConsumerService,
$commentConsumerService,
$quotedStringConsumerService
]
);
}
/**
* Overridden to return patterns matching end tokens ("," and ";"), and
* whitespace.
*
* @return string[] the patterns
*/
public function getTokenSeparators() : array
{
return [',', ';', '\s+'];
}
/**
* Returns true for commas and semi-colons.
*
* Although the semi-colon is not strictly the end token of an
* AddressConsumerService, it could end a parent
* {@see AddressGroupConsumerService}.
*/
protected function isEndToken(string $token) : bool
{
return ($token === ',' || $token === ';');
}
/**
* AddressConsumer is "greedy", so this always returns true.
*/
protected function isStartToken(string $token) : bool
{
return true;
}
/**
* Performs final processing on parsed parts.
*
* AddressConsumerService's implementation looks for tokens representing the
* beginning of an address part, to create a {@see AddressPart} out of a
* name/address pair, or assign the name part to a parsed
* {@see AddressGroupPart} returned from its AddressGroupConsumerService
* sub-consumer.
*
* The returned array consists of a single element - either an
* {@see AddressPart} or an {@see AddressGroupPart}.
*
* @param \ZBateson\MailMimeParser\Header\IHeaderPart[] $parts
* @return \ZBateson\MailMimeParser\Header\IHeaderPart[]|array
*/
protected function processParts(array $parts) : array
{
$found = null;
$revved = \array_reverse($parts, true);
foreach ($revved as $key => $part) {
if ($part instanceof AddressGroupPart || $part instanceof AddressPart) {
$found = $part;
// purposefully ignoring anything after
\array_splice($parts, $key);
break;
}
}
if ($found !== null) {
if ($found instanceof AddressGroupPart) {
return [$this->partFactory->newAddressGroupPart(
$parts,
[$found]
)];
}
return [$this->partFactory->newAddress(
$parts,
[$found]
)];
}
return [
$this->partFactory->newAddress(
[],
\array_map(fn ($p) => ($p instanceof MimeToken) ? $this->partFactory->newToken($p->getRawValue()) : $p, $parts)
)
];
}
}

View File

@@ -0,0 +1,78 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Part\HeaderPartFactory;
/**
* Parses the Address portion of an email address header, for an address part
* that contains both a name and an email address, e.g. "name" <email@tld.com>.
*
* The address portion found within the '<' and '>' chars may contain comments
* and quoted portions.
*
* @author Zaahid Bateson
*/
class AddressEmailConsumerService extends AbstractConsumerService
{
public function __construct(
LoggerInterface $logger,
HeaderPartFactory $partFactory,
CommentConsumerService $commentConsumerService,
QuotedStringConsumerService $quotedStringConsumerService
) {
parent::__construct(
$logger,
$partFactory,
[$commentConsumerService, $quotedStringConsumerService]
);
}
/**
* Overridden to return patterns matching the beginning/end part of an
* address in a name/address part ("<" and ">" chars).
*
* @return string[] the patterns
*/
public function getTokenSeparators() : array
{
return ['<', '>'];
}
/**
* Returns true for the '>' char.
*/
protected function isEndToken(string $token) : bool
{
return ($token === '>');
}
/**
* Returns true for the '<' char.
*/
protected function isStartToken(string $token) : bool
{
return ($token === '<');
}
/**
* Returns a single {@see ZBateson\MailMimeParser\Header\Part\AddressPart}
* with its 'email' portion set, so an {@see AddressConsumerService} can
* identify it and create an
* {@see ZBateson\MailMimeParser\Header\Part\AddressPart} Address with
* both a name and email set.
*
* @param \ZBateson\MailMimeParser\Header\IHeaderPart[] $parts
* @return \ZBateson\MailMimeParser\Header\IHeaderPart[]|array
*/
protected function processParts(array $parts) : array
{
return [$this->partFactory->newAddress([], $parts)];
}
}

View File

@@ -0,0 +1,106 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Psr\Log\LoggerInterface;
use Iterator;
use ZBateson\MailMimeParser\Header\Part\AddressGroupPart;
use ZBateson\MailMimeParser\Header\Part\HeaderPartFactory;
/**
* Parses a single group of addresses (as a named-group part of an address
* header).
*
* Finds addresses using its AddressConsumerService sub-consumer separated by
* commas, and ends processing once a semi-colon is found.
*
* Prior to returning to its calling client, AddressGroupConsumerService
* constructs a single Part\AddressGroupPart object filling it with all located
* addresses, and returns it.
*
* The AddressGroupConsumerService extends AddressBaseConsumerService to define
* start/end tokens, token separators, and construct a Part\AddressGroupPart to
* return.
*
* @author Zaahid Bateson
*/
class AddressGroupConsumerService extends AddressBaseConsumerService
{
public function __construct(LoggerInterface $logger, HeaderPartFactory $partFactory)
{
AbstractConsumerService::__construct($logger, $partFactory, []);
}
/**
* Needs to be called in AddressConsumerService's constructor to avoid a
* circular dependency.
*
*/
public function setAddressConsumerService(AddressConsumerService $subConsumer) : void
{
$this->subConsumers = [$subConsumer];
}
/**
* Overridden to return patterns matching the beginning and end markers of a
* group address: colon and semi-colon (":" and ";") characters.
*
* @return string[] the patterns
*/
public function getTokenSeparators() : array
{
return [':', ';'];
}
/**
* Returns true if the passed token is a semi-colon.
*/
protected function isEndToken(string $token) : bool
{
return ($token === ';');
}
/**
* Returns true if the passed token is a colon.
*/
protected function isStartToken(string $token) : bool
{
return ($token === ':');
}
/**
* Overridden to always call processParts even for an empty set of
* addresses, since a group could be empty.
*
* @param Iterator $tokens
* @return IHeaderPart[]
*/
protected function parseTokensIntoParts(Iterator $tokens) : array
{
$ret = parent::parseTokensIntoParts($tokens);
if ($ret === []) {
return $this->processParts([]);
}
return $ret;
}
/**
* Performs post-processing on parsed parts.
*
* Returns an array with a single
* {@see AddressGroupPart} element with all email addresses from this and
* any sub-groups.
*
* @param \ZBateson\MailMimeParser\Header\IHeaderPart[] $parts
* @return AddressGroupPart[]|array
*/
protected function processParts(array $parts) : array
{
return [$this->partFactory->newAddressGroupPart([], $parts)];
}
}

View File

@@ -0,0 +1,113 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Iterator;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\IHeaderPart;
use ZBateson\MailMimeParser\Header\Part\MimeTokenPartFactory;
/**
* Consumes all tokens within parentheses as comments.
*
* Parenthetical comments in mime-headers can be nested within one another. The
* outer-level continues after an inner-comment ends. Additionally,
* quoted-literals may exist with comments as well meaning a parenthesis inside
* a quoted string would not begin or end a comment section.
*
* In order to satisfy these specifications, CommentConsumerService inherits
* from GenericConsumerService which defines CommentConsumerService and
* QuotedStringConsumerService as sub-consumers.
*
* Examples:
* X-Mime-Header: Some value (comment)
* X-Mime-Header: Some value (comment (nested comment) still in comment)
* X-Mime-Header: Some value (comment "and part of original ) comment" -
* still a comment)
*
* @author Zaahid Bateson
*/
class CommentConsumerService extends GenericConsumerService
{
public function __construct(
LoggerInterface $logger,
MimeTokenPartFactory $partFactory,
QuotedStringConsumerService $quotedStringConsumerService
) {
parent::__construct(
$logger,
$partFactory,
$this,
$quotedStringConsumerService
);
}
/**
* Returns patterns matching open and close parenthesis characters
* as separators.
*
* @return string[] the patterns
*/
protected function getTokenSeparators() : array
{
return \array_merge(parent::getTokenSeparators(), ['\(', '\)']);
}
/**
* Returns true if the token is an open parenthesis character, '('.
*/
protected function isStartToken(string $token) : bool
{
return ($token === '(');
}
/**
* Returns true if the token is a close parenthesis character, ')'.
*/
protected function isEndToken(string $token) : bool
{
return ($token === ')');
}
/**
* Instantiates and returns Part\Token objects.
*
* Tokens from this and sub-consumers are combined into a Part\CommentPart
* in processParts.
*/
protected function getPartForToken(string $token, bool $isLiteral) : ?IHeaderPart
{
return $this->partFactory->newInstance($token);
}
/**
* Calls $tokens->next() and returns.
*
* The default implementation checks if the current token is an end token,
* and will not advance past it. Because a comment part of a header can be
* nested, its implementation must advance past its own 'end' token.
*/
protected function advanceToNextToken(Iterator $tokens, bool $isStartToken) : static
{
$tokens->next();
return $this;
}
/**
* Post processing involves creating a single Part\CommentPart out of
* generated parts from tokens. The Part\CommentPart is returned in an
* array.
*
* @param IHeaderPart[] $parts
* @return IHeaderPart[]
*/
protected function processParts(array $parts) : array
{
return [$this->partFactory->newCommentPart($parts)];
}
}

View File

@@ -0,0 +1,45 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use ZBateson\MailMimeParser\Header\IHeaderPart;
/**
* Parses a date header into a Part\DatePart taking care of comment and quoted
* parts as necessary.
*
* @author Zaahid Bateson
*/
class DateConsumerService extends GenericConsumerService
{
/**
* Returns a Part\LiteralPart for the current token
*
* @param string $token the token
* @param bool $isLiteral set to true if the token represents a literal -
* e.g. an escaped token
*/
protected function getPartForToken(string $token, bool $isLiteral) : ?IHeaderPart
{
return $this->partFactory->newToken($token, false);
}
/**
* Constructs a single Part\DatePart of any parsed parts returning it in an
* array with a single element.
*
* @param \ZBateson\MailMimeParser\Header\IHeaderPart[] $parts The parsed
* parts.
* @return \ZBateson\MailMimeParser\Header\IHeaderPart[] Array of resulting
* final parts.
*/
protected function processParts(array $parts) : array
{
return [$this->partFactory->newDatePart($parts)];
}
}

View File

@@ -0,0 +1,34 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Part\MimeTokenPartFactory;
/**
* GenericConsumerMimeLiteralPartService uses a MimeTokenPartFactory instead
* of a HeaderPartFactory.
*
* @author Zaahid Bateson
*/
class GenericConsumerMimeLiteralPartService extends GenericConsumerService
{
public function __construct(
LoggerInterface $logger,
MimeTokenPartFactory $partFactory,
CommentConsumerService $commentConsumerService,
QuotedStringConsumerService $quotedStringConsumerService
) {
parent::__construct(
$logger,
$partFactory,
$commentConsumerService,
$quotedStringConsumerService
);
}
}

View File

@@ -0,0 +1,34 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Part\HeaderPartFactory;
/**
* The base GenericConsumerService is a consumer with CommentConsumerService and
* QuotedStringConsumerService as sub-consumers, and splitting tokens by
* whitespace.
*
* @author Zaahid Bateson
*/
class GenericConsumerService extends AbstractGenericConsumerService
{
public function __construct(
LoggerInterface $logger,
HeaderPartFactory $partFactory,
CommentConsumerService $commentConsumerService,
QuotedStringConsumerService $quotedStringConsumerService
) {
parent::__construct(
$logger,
$partFactory,
[$commentConsumerService, $quotedStringConsumerService]
);
}
}

View File

@@ -0,0 +1,26 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
/**
* Interface defining a consumer service class.
*
* @author Zaahid Bateson
*/
interface IConsumerService
{
/**
* Invokes parsing of a header's value into header parts.
*
* @param string $value the raw header value
* @return \ZBateson\MailMimeParser\Header\IHeaderPart[] the array of parsed
* parts
*/
public function __invoke(string $value) : array;
}

View File

@@ -0,0 +1,90 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\IHeaderPart;
use ZBateson\MailMimeParser\Header\Part\HeaderPartFactory;
/**
* Serves as a base-consumer for ID headers (like Message-ID and Content-ID).
*
* IdBaseConsumerService handles invalidly-formatted IDs not within '<' and '>'
* characters. Processing for validly-formatted IDs are passed on to its
* sub-consumer, IdConsumer.
*
* @author Zaahid Bateson
*/
class IdBaseConsumerService extends AbstractConsumerService
{
public function __construct(
LoggerInterface $logger,
HeaderPartFactory $partFactory,
CommentConsumerService $commentConsumerService,
QuotedStringConsumerService $quotedStringConsumerService,
IdConsumerService $idConsumerService
) {
parent::__construct(
$logger,
$partFactory,
[
$commentConsumerService,
$quotedStringConsumerService,
$idConsumerService
]
);
}
/**
* Returns '\s+' as a whitespace separator.
*
* @return string[] an array of regex pattern matchers.
*/
protected function getTokenSeparators() : array
{
return ['\s+'];
}
/**
* IdBaseConsumerService doesn't have start/end tokens, and so always
* returns false.
*/
protected function isEndToken(string $token) : bool
{
return false;
}
/**
* IdBaseConsumerService doesn't have start/end tokens, and so always
* returns false.
*
* @codeCoverageIgnore
*/
protected function isStartToken(string $token) : bool
{
return false;
}
/**
* Returns null for whitespace, and
* {@see ZBateson\MailMimeParser\Header\Part\Token} for anything else.
*
* @param string $token the token
* @param bool $isLiteral set to true if the token represents a literal -
* e.g. an escaped token
* @return ?IHeaderPart The constructed header part or null if the token
* should be ignored
*/
protected function getPartForToken(string $token, bool $isLiteral) : ?IHeaderPart
{
if (\preg_match('/^\s+$/', $token)) {
return null;
}
return $this->partFactory->newToken($token, true);
}
}

View File

@@ -0,0 +1,57 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use ZBateson\MailMimeParser\Header\IHeaderPart;
/**
* Parses a single ID from an ID header. Begins consuming on a '<' char, and
* ends on a '>' char.
*
* @author Zaahid Bateson
*/
class IdConsumerService extends GenericConsumerService
{
/**
* Overridden to return patterns matching the beginning part of an ID ('<'
* and '>' chars).
*
* @return string[] the patterns
*/
public function getTokenSeparators() : array
{
return \array_merge(parent::getTokenSeparators(), ['<', '>']);
}
/**
* Returns true for '>'.
*/
protected function isEndToken(string $token) : bool
{
return ($token === '>');
}
/**
* Returns true for '<'.
*/
protected function isStartToken(string $token) : bool
{
return ($token === '<');
}
/**
* Returns null for whitespace, and Token for anything else.
*/
protected function getPartForToken(string $token, bool $isLiteral) : ?IHeaderPart
{
if (\preg_match('/^\s+$/', $token)) {
return null;
}
return $this->partFactory->newToken($token, true);
}
}

View File

@@ -0,0 +1,96 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Iterator;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\IHeaderPart;
use ZBateson\MailMimeParser\Header\Part\HeaderPartFactory;
use ZBateson\MailMimeParser\Header\Part\ParameterPart;
/**
* Reads headers separated into parameters consisting of an optional main value,
* and subsequent name/value pairs - for example text/html; charset=utf-8.
*
* A ParameterConsumerService's parts are separated by a semi-colon. Its
* name/value pairs are separated with an '=' character.
*
* Parts may be mime-encoded entities, or RFC-2231 split/encoded parts.
* Additionally, a value can be quoted and comments may exist.
*
* Actual processing of parameters is done in ParameterNameValueConsumerService,
* with ParameterConsumerService processing all collected parts into split
* parameter parts as necessary.
*
* @author Zaahid Bateson
*/
class ParameterConsumerService extends AbstractGenericConsumerService
{
use QuotedStringMimeLiteralPartTokenSplitPatternTrait;
public function __construct(
LoggerInterface $logger,
HeaderPartFactory $partFactory,
ParameterNameValueConsumerService $parameterNameValueConsumerService,
CommentConsumerService $commentConsumerService,
QuotedStringConsumerService $quotedStringConsumerService
) {
parent::__construct(
$logger,
$partFactory,
[$parameterNameValueConsumerService, $commentConsumerService, $quotedStringConsumerService]
);
}
/**
* Disables advancing for start tokens.
*/
protected function advanceToNextToken(Iterator $tokens, bool $isStartToken) : static
{
if ($isStartToken) {
return $this;
}
parent::advanceToNextToken($tokens, $isStartToken);
return $this;
}
/**
* Post processing involves looking for split parameter parts with matching
* names and combining them into a SplitParameterPart, and otherwise
* returning ParameterParts from ParameterNameValueConsumer as-is.
*
* @param IHeaderPart[] $parts The parsed parts.
* @return IHeaderPart[] Array of resulting final parts.
*/
protected function processParts(array $parts) : array
{
$factory = $this->partFactory;
return \array_values(\array_map(
function($partsArray) use ($factory) {
if (\count($partsArray) > 1) {
return $factory->newSplitParameterPart($partsArray);
}
return $partsArray[0];
},
\array_merge_recursive(...\array_map(
function($p) {
// if $p->getIndex is non-null, it's a split-parameter part
// and an array of one element consisting of name => ParameterPart
// is returned, which is then merged into name => array-of-parameter-parts
// or ';' object_id . ';' for non-split parts with a value of a single
// element array of [ParameterPart]
if ($p instanceof ParameterPart && $p->getIndex() !== null) {
return [\strtolower($p->getName()) => [$p]];
}
return [';' . \spl_object_id($p) . ';' => [$p]];
},
$parts
))
));
}
}

View File

@@ -0,0 +1,97 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\IHeaderPart;
use ZBateson\MailMimeParser\Header\Part\ContainerPart;
use ZBateson\MailMimeParser\Header\Part\MimeTokenPartFactory;
/**
* Parses an individual part of a parameter header.
*
* 'isStartToken' always returns true, so control is taken from
* ParameterConsumerService always, and returned when a ';' is encountered (and
* so processes a single part and returns it, then gets control back).0
*
* If an '=' is encountered, the ParameterValueConsumerService sub-consumer
* takes control and parses the value of a parameter.
*
* If no '=' is encountered, it's assumed to be a single value element, which
* should be the first part of a parameter header, e.g. 'text/html' in
* Content-Type: text/html; charset=utf-8
*
* @author Zaahid Bateson
*/
class ParameterNameValueConsumerService extends AbstractGenericConsumerService
{
public function __construct(
LoggerInterface $logger,
MimeTokenPartFactory $partFactory,
ParameterValueConsumerService $parameterValueConsumerService,
CommentConsumerService $commentConsumerService,
QuotedStringConsumerService $quotedStringConsumerService
) {
parent::__construct(
$logger,
$partFactory,
[$parameterValueConsumerService, $commentConsumerService, $quotedStringConsumerService]
);
}
/**
* Returns semi-colon as a token separator, in addition to parent token
* separators.
*
* @return string[]
*/
protected function getTokenSeparators() : array
{
return \array_merge(parent::getTokenSeparators(), [';']);
}
/**
* Always returns true to grab control from its parent
* ParameterConsumerService.
*/
protected function isStartToken(string $token) : bool
{
return true;
}
/**
* Returns true if the token is a ';' char.
*/
protected function isEndToken(string $token) : bool
{
return ($token === ';');
}
/**
* Creates either a ContainerPart if an '=' wasn't encountered, indicating
* this to be the main 'value' part of a header (or a malformed part of a
* parameter header), or a ParameterPart if the last IHeaderPart in the
* passed $parts array is already a ContainerPart (indicating it was parsed
* in ParameterValueConsumerService.)
*
* @param IHeaderPart[] $parts The parsed parts.
* @return IHeaderPart[] Array of resulting final parts.
*/
protected function processParts(array $parts) : array
{
$nameOnly = $parts;
$valuePart = \array_pop($nameOnly);
if (!($valuePart instanceof ContainerPart)) {
return [$this->partFactory->newContainerPart($parts)];
}
return [$this->partFactory->newParameterPart(
$nameOnly,
$valuePart
)];
}
}

View File

@@ -0,0 +1,64 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Part\MimeTokenPartFactory;
/**
* Starts processing tokens after a '=' character is found, indicating the
* 'value' portion of a name/value pair in a parameter header.
*
* The value portion will consist of all tokens, quoted parts, and comment parts
* parsed up to a semi-colon token indicating control should be returned to the
* parent ParameterNameValueConsumerService.
*
* @author Zaahid Bateson
*/
class ParameterValueConsumerService extends GenericConsumerMimeLiteralPartService
{
public function __construct(
LoggerInterface $logger,
MimeTokenPartFactory $partFactory,
CommentConsumerService $commentConsumerService,
QuotedStringMimeLiteralPartConsumerService $quotedStringConsumerService
) {
parent::__construct(
$logger,
$partFactory,
$commentConsumerService,
$quotedStringConsumerService
);
}
/**
* Returns semi-colon and equals char as token separators.
*
* @return string[]
*/
protected function getTokenSeparators() : array
{
return \array_merge(parent::getTokenSeparators(), ['=', ';']);
}
/**
* Returns true if the token is an '=' character.
*/
protected function isStartToken(string $token) : bool
{
return ($token === '=');
}
/**
* Returns true if the token is a ';' character.
*/
protected function isEndToken(string $token) : bool
{
return ($token === ';');
}
}

View File

@@ -0,0 +1,79 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use ZBateson\MailMimeParser\Header\IHeaderPart;
/**
* Represents a quoted part of a header value starting at a double quote, and
* ending at the next double quote.
*
* A quoted-pair part in a header is a literal. There are no sub-consumers for
* it and a Part\LiteralPart is returned.
*
* Newline characters (CR and LF) are stripped entirely from the quoted part.
* This is based on the example at:
*
* https://tools.ietf.org/html/rfc822#section-3.1.1
*
* And https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html in section 7.2.1
* splitting the boundary.
*
* @author Zaahid Bateson
*/
class QuotedStringConsumerService extends AbstractConsumerService
{
/**
* Returns true if the token is a double quote.
*/
protected function isStartToken(string $token) : bool
{
return ($token === '"');
}
/**
* Returns true if the token is a double quote.
*/
protected function isEndToken(string $token) : bool
{
return ($token === '"');
}
/**
* Returns a single regex pattern for a double quote.
*
* @return string[]
*/
protected function getTokenSeparators() : array
{
return ['\"'];
}
/**
* Constructs a LiteralPart and returns it.
*/
protected function getPartForToken(string $token, bool $isLiteral) : ?IHeaderPart
{
return $this->partFactory->newToken($token, $isLiteral, true);
}
/**
* Overridden to combine all part values into a single string and return it
* as an array with a single element.
*
* The returned IHeaderParts is an array containing a single
* QuotedLiteralPart.
*
* @param IHeaderPart[] $parts
* @return IHeaderPart[]
*/
protected function processParts(array $parts) : array
{
return [$this->partFactory->newQuotedLiteralPart($parts)];
}
}

View File

@@ -0,0 +1,33 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use ZBateson\MailMimeParser\Header\IHeaderPart;
use ZBateson\MailMimeParser\Header\Part\MimeToken;
/**
* Allows for mime-encoded parts inside a quoted part.
*
* @author Zaahid Bateson
*/
class QuotedStringMimeLiteralPartConsumerService extends QuotedStringConsumerService
{
/**
* Constructs a LiteralPart and returns it.
*
* @param bool $isLiteral not used - everything in a quoted string is a
* literal
*/
protected function getPartForToken(string $token, bool $isLiteral) : ?IHeaderPart
{
if (!$isLiteral && \preg_match('/' . MimeToken::MIME_PART_PATTERN . '/', $token)) {
return $this->partFactory->newMimeToken($token);
}
return $this->partFactory->newToken($token, $isLiteral);
}
}

View File

@@ -0,0 +1,37 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use ZBateson\MailMimeParser\Header\Part\MimeToken;
/**
* Provides a getTokenSplitPattern for consumers that could have quoted parts
* that are mime-header-encoded.
*
* @author Zaahid Bateson
*/
trait QuotedStringMimeLiteralPartTokenSplitPatternTrait
{
/**
* Overridden to use a specialized regex for finding mime-encoded parts
* (RFC 2047).
*
* Some implementations seem to place mime-encoded parts within quoted
* parameters, and split the mime-encoded parts across multiple split
* parameters. The specialized regex doesn't allow double quotes inside a
* mime encoded part, so it can be "continued" in another parameter.
*
* @return string the regex pattern
*/
protected function getTokenSplitPattern() : string
{
$sChars = \implode('|', $this->getAllTokenSeparators());
$mimePartPattern = MimeToken::MIME_PART_PATTERN_NO_QUOTES;
return '~(' . $mimePartPattern . '|\\\\\r\n|\\\\.|' . $sChars . ')~ms';
}
}

View File

@@ -0,0 +1,68 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer\Received;
/**
* Parses a so-called "extended-domain" (from and by) part of a Received header.
*
* Looks for and extracts the following fields from an extended-domain part:
* Name, Hostname and Address.
*
* The Name part is always the portion of the extended-domain part existing on
* its own, outside of the parenthesized hostname and address part. This is
* true regardless of whether an address is used as the name, as its assumed to
* be the string used to identify the server, whatever it may be.
*
* The parenthesized part normally (but not necessarily) following a name must
* "look like" a tcp-info section of an extended domain as defined by RFC5321.
* The validation is very purposefully very loose to be accommodating to many
* erroneous implementations. The only restriction is the host part must
* contain two characters, the first being alphanumeric, followed by any number
* of more alphanumeric, '.', and '-' characters. The address part must be
* within square brackets, '[]'... although an address outside of square
* brackets could be matched by the domain matcher if it exists alone within the
* parentheses. The address is any number of '.', numbers, ':' and letters a-f.
* This allows it to match ipv6 addresses as well. In addition, the address may
* start with the string "ipv6", and may be followed by a port number as some
* implementations seem to do.
*
* Strings in parentheses not matching the aforementioned 'domain/address'
* pattern will be considered comments, and will be returned as a separate
* CommentPart.
*
* @see https://tools.ietf.org/html/rfc5321#section-4.4
* @see https://github.com/Te-k/pyreceived/blob/master/test.py
* @author Zaahid Bateson
* @author Mariusz Krzaczkowski
*/
class DomainConsumerService extends GenericReceivedConsumerService
{
/**
* Overridden to return true if the passed token is a closing parenthesis.
*/
protected function isEndToken(string $token) : bool
{
if ($token === ')') {
return true;
}
return parent::isEndToken($token);
}
/**
* Creates a single ReceivedDomainPart out of matched parts. If an
* unmatched parenthesized expression was found, it's returned as a
* CommentPart.
*
* @param \ZBateson\MailMimeParser\Header\Part\HeaderPart[] $parts
* @return \ZBateson\MailMimeParser\Header\Part\ReceivedDomainPart[]|\ZBateson\MailMimeParser\Header\Part\CommentPart[]|\ZBateson\MailMimeParser\Header\Part\HeaderPart[]
*/
protected function processParts(array $parts) : array
{
return [$this->partFactory->newReceivedDomainPart($this->partName, $parts)];
}
}

View File

@@ -0,0 +1,113 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer\Received;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Consumer\AbstractGenericConsumerService;
use ZBateson\MailMimeParser\Header\Consumer\CommentConsumerService;
use ZBateson\MailMimeParser\Header\Part\HeaderPartFactory;
/**
* Consumes simple literal strings for parts of a Received header.
*
* Starts consuming when the initialized $partName string is located, for
* instance when initialized with "FROM", will start consuming on " FROM" or
* "FROM ".
*
* The consumer ends when any possible "Received" header part is found, namely
* on one of the following tokens: from, by, via, with, id, for, or when the
* start token for the date stamp is found, ';'.
*
* The consumer allows comments in and around the consumer... although the
* Received header specification only allows them before a part, for example,
* technically speaking this is valid:
*
* "FROM machine (host) (comment) BY machine"
*
* However, this is not:
*
* "FROM machine (host) BY machine WITH (comment) ESMTP"
*
* The consumer will allow both.
*
* @author Zaahid Bateson
*/
class GenericReceivedConsumerService extends AbstractGenericConsumerService
{
/**
* @var string the current part name being parsed.
*
* This is always the lower-case name provided to the constructor, not the
* actual string that started the consumer, which could be in any case.
*/
protected $partName;
/**
* Constructor overridden to include $partName parameter.
*
*/
public function __construct(
LoggerInterface $logger,
HeaderPartFactory $partFactory,
CommentConsumerService $commentConsumerService,
string $partName
) {
parent::__construct($logger, $partFactory, [$commentConsumerService]);
$this->partName = $partName;
}
/**
* Returns true if the passed token matches (case-insensitively)
* $this->getPartName() with optional whitespace surrounding it.
*/
protected function isStartToken(string $token) : bool
{
$pattern = '/^' . \preg_quote($this->partName, '/') . '$/i';
return (\preg_match($pattern, $token) === 1);
}
/**
* Returns true if the token matches (case-insensitively) any of the
* following, with optional surrounding whitespace:
*
* o by
* o via
* o with
* o id
* o for
* o ;
*/
protected function isEndToken(string $token) : bool
{
return (\preg_match('/^(by|via|with|id|for|;)$/i', $token) === 1);
}
/**
* Returns a whitespace separator (for filtering ignorable whitespace
* between parts), and a separator matching the current part name as
* set on $this->partName.
*
* @return string[] an array of regex pattern matchers
*/
protected function getTokenSeparators() : array
{
return [
'\s+',
'(\A\s*|\s+)(?i)' . \preg_quote($this->partName, '/') . '(?-i)(?=\s+)'
];
}
/**
* @param \ZBateson\MailMimeParser\Header\IHeaderPart[] $parts
* @return \ZBateson\MailMimeParser\Header\IHeaderPart[]
*/
protected function processParts(array $parts) : array
{
return [$this->partFactory->newReceivedPart($this->partName, $parts)];
}
}

View File

@@ -0,0 +1,40 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer\Received;
use ZBateson\MailMimeParser\Header\Consumer\DateConsumerService;
/**
* Parses the date portion of a Received header into a DatePart.
*
* The only difference between DateConsumerService and
* ReceivedDateConsumerService is the addition of a start token, ';', and a
* token separator (also ';').
*
* @author Zaahid Bateson
*/
class ReceivedDateConsumerService extends DateConsumerService
{
/**
* Returns true if the token is a ';'
*/
protected function isStartToken(string $token) : bool
{
return ($token === ';');
}
/**
* Returns an array containing ';'.
*
* @return string[] an array of regex pattern matchers
*/
protected function getTokenSeparators() : array
{
return [';'];
}
}

View File

@@ -0,0 +1,129 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Iterator;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Consumer\Received\DomainConsumerService;
use ZBateson\MailMimeParser\Header\Consumer\Received\GenericReceivedConsumerService;
use ZBateson\MailMimeParser\Header\Consumer\Received\ReceivedDateConsumerService;
use ZBateson\MailMimeParser\Header\Part\HeaderPartFactory;
use ZBateson\MailMimeParser\Header\Part\Token;
/**
* Parses a Received header into ReceivedParts, ReceivedDomainParts, a DatePart,
* and CommentParts.
*
* Parts that don't correspond to any of the above are discarded.
*
* @author Zaahid Bateson
*/
class ReceivedConsumerService extends AbstractConsumerService
{
public function __construct(
LoggerInterface $logger,
HeaderPartFactory $partFactory,
DomainConsumerService $fromDomainConsumerService,
DomainConsumerService $byDomainConsumerService,
GenericReceivedConsumerService $viaGenericReceivedConsumerService,
GenericReceivedConsumerService $withGenericReceivedConsumerService,
GenericReceivedConsumerService $idGenericReceivedConsumerService,
GenericReceivedConsumerService $forGenericReceivedConsumerService,
ReceivedDateConsumerService $receivedDateConsumerService,
CommentConsumerService $commentConsumerService
) {
parent::__construct(
$logger,
$partFactory,
[
$fromDomainConsumerService,
$byDomainConsumerService,
$viaGenericReceivedConsumerService,
$withGenericReceivedConsumerService,
$idGenericReceivedConsumerService,
$forGenericReceivedConsumerService,
$receivedDateConsumerService,
$commentConsumerService
]
);
}
/**
* ReceivedConsumerService doesn't have any token separators of its own.
* Sub-Consumers will return separators matching 'part' word separators, for
* example 'from' and 'by', and ';' for date, etc...
*
* @return string[] an array of regex pattern matchers
*/
protected function getTokenSeparators() : array
{
return [];
}
/**
* ReceivedConsumerService doesn't have an end token, and so this just
* returns false.
*/
protected function isEndToken(string $token) : bool
{
return false;
}
/**
* ReceivedConsumerService doesn't start consuming at a specific token, it's
* the base handler for the Received header, and so this always returns
* false.
*
* @codeCoverageIgnore
*/
protected function isStartToken(string $token) : bool
{
return false;
}
/**
* Overridden to exclude the MimeLiteralPart pattern that comes by default
* in AbstractConsumer.
*
* @return string the regex pattern
*/
protected function getTokenSplitPattern() : string
{
$sChars = \implode('|', $this->getAllTokenSeparators());
return '~(' . $sChars . ')~';
}
/**
* Overridden to /not/ advance when the end token matches a start token for
* a sub-consumer.
*/
protected function advanceToNextToken(Iterator $tokens, bool $isStartToken) : static
{
if ($isStartToken) {
$tokens->next();
} elseif ($tokens->valid() && !$this->isEndToken($tokens->current())) {
foreach ($this->subConsumers as $consumer) {
if ($consumer->isStartToken($tokens->current())) {
return $this;
}
}
$tokens->next();
}
return $this;
}
/**
* @param \ZBateson\MailMimeParser\Header\IHeaderPart[] $parts
* @return \ZBateson\MailMimeParser\Header\IHeaderPart[]
*/
protected function processParts(array $parts) : array
{
// filtering out tokens (filters out the names, e.g. 'by' or 'with')
return \array_values(\array_filter($parts, fn ($p) => !$p instanceof Token));
}
}

View File

@@ -0,0 +1,62 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Consumer;
use Iterator;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\IHeaderPart;
use ZBateson\MailMimeParser\Header\Part\MimeToken;
use ZBateson\MailMimeParser\Header\Part\MimeTokenPartFactory;
/**
* Extends AbstractGenericConsumerService to use a MimeTokenPartFactory, and
* to preserve all whitespace and escape sequences as-is (unlike other headers
* subject headers don't have escape chars such as '\\' for a backslash).
*
* SubjectConsumerService doesn't define any sub-consumers.
*
* @author Zaahid Bateson
*/
class SubjectConsumerService extends AbstractGenericConsumerService
{
public function __construct(LoggerInterface $logger, MimeTokenPartFactory $partFactory)
{
parent::__construct($logger, $partFactory);
}
/**
* Overridden to preserve whitespace.
*
* Whitespace between two words is preserved unless the whitespace begins
* with a newline (\n or \r\n), in which case the entire string of
* whitespace is discarded, and a single space ' ' character is used in its
* place.
*/
protected function getPartForToken(string $token, bool $isLiteral) : ?IHeaderPart
{
if (\preg_match('/' . MimeToken::MIME_PART_PATTERN . '/', $token)) {
return $this->partFactory->newMimeToken($token);
}
return $this->partFactory->newSubjectToken($token);
}
/**
* Returns an array of \ZBateson\MailMimeParser\Header\Part\HeaderPart for
* the current token on the iterator.
*
* Overridden from AbstractConsumerService to remove special filtering for
* backslash escaping, which also seems to not apply to Subject headers at
* least in ThunderBird's implementation.
*
* @return IHeaderPart[]
*/
protected function getTokenParts(Iterator $tokens) : array
{
return $this->getConsumerTokenParts($tokens);
}
}

View File

@@ -0,0 +1,67 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use DateTime;
use DateTimeImmutable;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Consumer\DateConsumerService;
use ZBateson\MailMimeParser\Header\Part\DatePart;
use ZBateson\MailMimeParser\MailMimeParser;
/**
* Reads a DatePart value header in either RFC 2822 or RFC 822 format.
*
* @author Zaahid Bateson
*/
class DateHeader extends AbstractHeader
{
public function __construct(
string $name,
string $value,
?LoggerInterface $logger = null,
?DateConsumerService $consumerService = null
) {
$di = MailMimeParser::getGlobalContainer();
parent::__construct(
$logger ?? $di->get(LoggerInterface::class),
$consumerService ?? $di->get(DateConsumerService::class),
$name,
$value
);
}
/**
* Convenience method returning the part's DateTime object, or null if the
* date could not be parsed.
*
* @return ?DateTime The parsed DateTime object.
*/
public function getDateTime() : ?DateTime
{
if (!empty($this->parts) && $this->parts[0] instanceof DatePart) {
return $this->parts[0]->getDateTime();
}
return null;
}
/**
* Returns a DateTimeImmutable for the part's DateTime object, or null if
* the date could not be parsed.
*
* @return ?DateTimeImmutable The parsed DateTimeImmutable object.
*/
public function getDateTimeImmutable() : ?DateTimeImmutable
{
$dateTime = $this->getDateTime();
if ($dateTime !== null) {
return DateTimeImmutable::createFromMutable($dateTime);
}
return null;
}
}

View File

@@ -0,0 +1,47 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Consumer\GenericConsumerMimeLiteralPartService;
use ZBateson\MailMimeParser\MailMimeParser;
/**
* Reads a generic header.
*
* Header's may contain mime-encoded parts, quoted parts, and comments. The
* string value is the combined value of all its parts.
*
* @author Zaahid Bateson
*/
class GenericHeader extends AbstractHeader
{
public function __construct(
string $name,
string $value,
?LoggerInterface $logger = null,
?GenericConsumerMimeLiteralPartService $consumerService = null
) {
$di = MailMimeParser::getGlobalContainer();
parent::__construct(
$logger ?? $di->get(LoggerInterface::class),
$consumerService ?? $di->get(DateConsumerService::class),
$name,
$value
);
parent::__construct($logger, $consumerService, $name, $value);
}
public function getValue() : ?string
{
if (!empty($this->parts)) {
return \implode('', \array_map(function($p) { return $p->getValue(); }, $this->parts));
}
return null;
}
}

View File

@@ -0,0 +1,95 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
/**
* List of header name constants.
*
* @author Thomas Landauer
*/
abstract class HeaderConsts
{
// Headers according to the table at https://tools.ietf.org/html/rfc5322#section-3.6
public const RETURN_PATH = 'Return-Path';
public const RECEIVED = 'Received';
public const RESENT_DATE = 'Resent-Date';
public const RESENT_FROM = 'Resent-From';
public const RESENT_SENDER = 'Resent-Sender';
public const RESENT_TO = 'Resent-To';
public const RESENT_CC = 'Resent-Cc';
public const RESENT_BCC = 'Resent-Bcc';
public const RESENT_MSD_ID = 'Resent-Message-ID';
public const RESENT_MESSAGE_ID = self::RESENT_MSD_ID;
public const ORIG_DATE = 'Date';
public const DATE = self::ORIG_DATE;
public const FROM = 'From';
public const SENDER = 'Sender';
public const REPLY_TO = 'Reply-To';
public const TO = 'To';
public const CC = 'Cc';
public const BCC = 'Bcc';
public const MESSAGE_ID = 'Message-ID';
public const IN_REPLY_TO = 'In-Reply-To';
public const REFERENCES = 'References';
public const SUBJECT = 'Subject';
public const COMMENTS = 'Comments';
public const KEYWORDS = 'Keywords';
// https://datatracker.ietf.org/doc/html/rfc4021#section-2.2
public const MIME_VERSION = 'MIME-Version';
public const CONTENT_TYPE = 'Content-Type';
public const CONTENT_TRANSFER_ENCODING = 'Content-Transfer-Encoding';
public const CONTENT_ID = 'Content-ID';
public const CONTENT_DESCRIPTION = 'Content-Description';
public const CONTENT_DISPOSITION = 'Content-Disposition';
public const CONTENT_LANGUAGE = 'Content-Language';
public const CONTENT_BASE = 'Content-Base';
public const CONTENT_LOCATION = 'Content-Location';
public const CONTENT_FEATURES = 'Content-features';
public const CONTENT_ALTERNATIVE = 'Content-Alternative';
public const CONTENT_MD5 = 'Content-MD5';
public const CONTENT_DURATION = 'Content-Duration';
// https://datatracker.ietf.org/doc/html/rfc3834
public const AUTO_SUBMITTED = 'Auto-Submitted';
}

View File

@@ -0,0 +1,213 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use Psr\Log\LoggerInterface;
use ReflectionClass;
use ZBateson\MailMimeParser\Header\Consumer\AddressBaseConsumerService;
use ZBateson\MailMimeParser\Header\Consumer\DateConsumerService;
use ZBateson\MailMimeParser\Header\Consumer\GenericConsumerMimeLiteralPartService;
use ZBateson\MailMimeParser\Header\Consumer\IConsumerService;
use ZBateson\MailMimeParser\Header\Consumer\IdBaseConsumerService;
use ZBateson\MailMimeParser\Header\Consumer\ParameterConsumerService;
use ZBateson\MailMimeParser\Header\Consumer\ReceivedConsumerService;
use ZBateson\MailMimeParser\Header\Consumer\SubjectConsumerService;
use ZBateson\MailMimeParser\Header\Part\MimeTokenPartFactory;
/**
* Constructs various IHeader types depending on the type of header passed.
*
* If the passed header resolves to a specific defined header type, it is parsed
* as such. Otherwise, a GenericHeader is instantiated and returned. Headers
* are mapped as follows:
*
* - {@see AddressHeader}: From, To, Cc, Bcc, Sender, Reply-To, Resent-From,
* Resent-To, Resent-Cc, Resent-Bcc, Resent-Reply-To, Return-Path,
* Delivered-To
* - {@see DateHeader}: Date, Resent-Date, Delivery-Date, Expires, Expiry-Date,
* Reply-By
* - {@see ParameterHeader}: Content-Type, Content-Disposition, Received-SPF,
* Authentication-Results, DKIM-Signature, Autocrypt
* - {@see SubjectHeader}: Subject
* - {@see IdHeader}: Message-ID, Content-ID, In-Reply-To, References
* - {@see ReceivedHeader}: Received
*
* @author Zaahid Bateson
*/
class HeaderFactory
{
protected LoggerInterface $logger;
/**
* @var IConsumerService[] array of available consumer service classes
*/
protected array $consumerServices;
/**
* @var MimeTokenPartFactory for mime decoding.
*/
protected MimeTokenPartFactory $mimeTokenPartFactory;
/**
* @var string[][] maps IHeader types to headers.
*/
protected $types = [
AddressHeader::class => [
'from',
'to',
'cc',
'bcc',
'sender',
'replyto',
'resentfrom',
'resentto',
'resentcc',
'resentbcc',
'resentreplyto',
'returnpath',
'deliveredto',
],
DateHeader::class => [
'date',
'resentdate',
'deliverydate',
'expires',
'expirydate',
'replyby',
],
ParameterHeader::class => [
'contenttype',
'contentdisposition',
'receivedspf',
'authenticationresults',
'dkimsignature',
'autocrypt',
],
SubjectHeader::class => [
'subject',
],
IdHeader::class => [
'messageid',
'contentid',
'inreplyto',
'references'
],
ReceivedHeader::class => [
'received'
]
];
/**
* @var string Defines the generic IHeader type to use for headers that
* aren't mapped in $types
*/
protected $genericType = GenericHeader::class;
public function __construct(
LoggerInterface $logger,
MimeTokenPartFactory $mimeTokenPartFactory,
AddressBaseConsumerService $addressBaseConsumerService,
DateConsumerService $dateConsumerService,
GenericConsumerMimeLiteralPartService $genericConsumerMimeLiteralPartService,
IdBaseConsumerService $idBaseConsumerService,
ParameterConsumerService $parameterConsumerService,
ReceivedConsumerService $receivedConsumerService,
SubjectConsumerService $subjectConsumerService
) {
$this->logger = $logger;
$this->mimeTokenPartFactory = $mimeTokenPartFactory;
$this->consumerServices = [
AddressBaseConsumerService::class => $addressBaseConsumerService,
DateConsumerService::class => $dateConsumerService,
GenericConsumerMimeLiteralPartService::class => $genericConsumerMimeLiteralPartService,
IdBaseConsumerService::class => $idBaseConsumerService,
ParameterConsumerService::class => $parameterConsumerService,
ReceivedConsumerService::class => $receivedConsumerService,
SubjectConsumerService::class => $subjectConsumerService
];
}
/**
* Returns the string in lower-case, and with non-alphanumeric characters
* stripped out.
*
* @param string $header The header name
* @return string The normalized header name
*/
public function getNormalizedHeaderName(string $header) : string
{
return \preg_replace('/[^a-z0-9]/', '', \strtolower($header));
}
/**
* Returns the name of an IHeader class for the passed header name.
*
* @param string $name The header name.
* @return string The Fully Qualified class name.
*/
private function getClassFor(string $name) : string
{
$test = $this->getNormalizedHeaderName($name);
foreach ($this->types as $class => $matchers) {
foreach ($matchers as $matcher) {
if ($test === $matcher) {
return $class;
}
}
}
return $this->genericType;
}
/**
* Creates an IHeader instance for the passed header name and value, and
* returns it.
*
* @param string $name The header name.
* @param string $value The header value.
* @return IHeader The created header object.
*/
public function newInstance(string $name, string $value) : IHeader
{
$class = $this->getClassFor($name);
$this->logger->debug(
'Creating {class} for header with name "{name}" and value "{value}"',
['class' => $class, 'name' => $name, 'value' => $value]
);
return $this->newInstanceOf($name, $value, $class);
}
/**
* Creates an IHeader instance for the passed header name and value using
* the passed IHeader class, and returns it.
*
* @param string $name The header name.
* @param string $value The header value.
* @param string $iHeaderClass The class to use for header creation
* @return IHeader The created header object.
*/
public function newInstanceOf(string $name, string $value, string $iHeaderClass) : IHeader
{
$ref = new ReflectionClass($iHeaderClass);
$params = $ref->getConstructor()->getParameters();
if ($ref->isSubclassOf(MimeEncodedHeader::class)) {
return new $iHeaderClass(
$name,
$value,
$this->logger,
$this->mimeTokenPartFactory,
$this->consumerServices[$params[4]->getType()->getName()]
);
}
return new $iHeaderClass(
$name,
$value,
$this->logger,
$this->consumerServices[$params[3]->getType()->getName()]
);
}
}

View File

@@ -0,0 +1,84 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use ZBateson\MailMimeParser\IErrorBag;
/**
* A mime email header line consisting of a name and value.
*
* The header object provides methods to access the header's name, raw value,
* and also its parsed value. The parsed value will depend on the type of
* header and in some cases may be broken up into other parts (for example email
* addresses in an address header, or parameters in a parameter header).
*
* @author Zaahid Bateson
*/
interface IHeader extends IErrorBag
{
/**
* Returns an array of IHeaderPart objects the header's value has been
* parsed into, excluding any
* {@see \ZBateson\MailMimeParser\Header\Part\CommentPart}s.
*
* To retrieve all parts /including/ CommentParts, {@see getAllParts()}.
*
* @return IHeaderPart[] The array of parts.
*/
public function getParts() : array;
/**
* Returns an array of all IHeaderPart objects the header's value has been
* parsed into, including any CommentParts.
*
* @return IHeaderPart[] The array of parts.
*/
public function getAllParts() : array;
/**
* Returns an array of comments parsed from the header. If there are no
* comments in the header, an empty array is returned.
*
* @return string[]
*/
public function getComments() : array;
/**
* Returns the parsed 'value' of the header.
*
* For headers that contain multiple parts, like address headers (To, From)
* or parameter headers (Content-Type), the 'value' is the value of the
* first parsed part that isn't a comment.
*
* @return string The value
*/
public function getValue() : ?string;
/**
* Returns the raw value of the header.
*
* @return string The raw value.
*/
public function getRawValue() : string;
/**
* Returns the name of the header.
*
* @return string The name.
*/
public function getName() : string;
/**
* Returns the string representation of the header.
*
* i.e.: '<HeaderName>: <RawValue>'
*
* @return string The string representation.
*/
public function __toString() : string;
}

View File

@@ -0,0 +1,36 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use Stringable;
use ZBateson\MailMimeParser\IErrorBag;
/**
* Represents a single parsed part of a header line's value.
*
* For header values with multiple parts, for instance a list of addresses, each
* address would be parsed into a single part.
*
* @author Zaahid Bateson
*/
interface IHeaderPart extends IErrorBag, Stringable
{
/**
* Returns the part's value.
*
* @return string The value of the part
*/
public function getValue() : ?string;
/**
* Returns any CommentParts under this part container.
*
* @return CommentPart[]
*/
public function getComments() : array;
}

View File

@@ -0,0 +1,71 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Consumer\IdBaseConsumerService;
use ZBateson\MailMimeParser\Header\Part\CommentPart;
use ZBateson\MailMimeParser\Header\Part\MimeTokenPartFactory;
use ZBateson\MailMimeParser\MailMimeParser;
/**
* Represents a Content-ID, Message-ID, In-Reply-To or References header.
*
* For a multi-id header like In-Reply-To or References, all IDs can be
* retrieved by calling {@see IdHeader::getIds()}. Otherwise, to retrieve the
* first (or only) ID call {@see IdHeader::getValue()}.
*
* @author Zaahid Bateson
*/
class IdHeader extends MimeEncodedHeader
{
public function __construct(
string $name,
string $value,
?LoggerInterface $logger = null,
?MimeTokenPartFactory $mimeTokenPartFactory = null,
?IdBaseConsumerService $consumerService = null
) {
$di = MailMimeParser::getGlobalContainer();
parent::__construct(
$logger ?? $di->get(LoggerInterface::class),
$mimeTokenPartFactory ?? $di->get(MimeTokenPartFactory::class),
$consumerService ?? $di->get(IdBaseConsumerService::class),
$name,
$value
);
}
/**
* Returns the ID. Synonymous to calling getValue().
*
* @return string|null The ID
*/
public function getId() : ?string
{
return $this->getValue();
}
/**
* Returns all IDs parsed for a multi-id header like References or
* In-Reply-To.
*
* @return string[] An array of IDs
*/
public function getIds() : array
{
return \array_values(\array_map(
function($p) {
return $p->getValue();
},
\array_filter($this->parts, function($p) {
return !($p instanceof CommentPart);
})
));
}
}

View File

@@ -0,0 +1,66 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Consumer\IConsumerService;
use ZBateson\MailMimeParser\Header\Part\MimeToken;
use ZBateson\MailMimeParser\Header\Part\MimeTokenPartFactory;
/**
* Allows a header to be mime-encoded and be decoded with a consumer after
* decoding.
*
* @author Zaahid Bateson
*/
abstract class MimeEncodedHeader extends AbstractHeader
{
/**
* @var MimeTokenPartFactory for mime decoding.
*/
protected MimeTokenPartFactory $mimeTokenPartFactory;
/**
* @var MimeLiteralPart[] the mime encoded parsed parts contained in this
* header
*/
protected $mimeEncodedParsedParts = [];
public function __construct(
LoggerInterface $logger,
MimeTokenPartFactory $mimeTokenPartFactory,
IConsumerService $consumerService,
string $name,
string $value
) {
$this->mimeTokenPartFactory = $mimeTokenPartFactory;
parent::__construct($logger, $consumerService, $name, $value);
}
/**
* Mime-decodes any mime-encoded parts prior to invoking
* parent::parseHeaderValue.
*/
protected function parseHeaderValue(IConsumerService $consumer, string $value) : void
{
// handled differently from MimeLiteralPart's decoding which ignores
// whitespace between parts, etc...
$matchp = '~(' . MimeToken::MIME_PART_PATTERN . ')~';
$aMimeParts = \preg_split($matchp, $value, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
$this->mimeEncodedParsedParts = \array_map([$this->mimeTokenPartFactory, 'newInstance'], $aMimeParts);
parent::parseHeaderValue(
$consumer,
\implode('', \array_map(fn ($part) => $part->getValue(), $this->mimeEncodedParsedParts))
);
}
protected function getErrorBagChildren() : array
{
return \array_values(\array_filter(\array_merge($this->getAllParts(), $this->mimeEncodedParsedParts)));
}
}

View File

@@ -0,0 +1,99 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Consumer\IConsumerService;
use ZBateson\MailMimeParser\Header\Consumer\ParameterConsumerService;
use ZBateson\MailMimeParser\Header\Part\NameValuePart;
use ZBateson\MailMimeParser\MailMimeParser;
/**
* Represents a header containing an optional main value part and subsequent
* name/value pairs.
*
* If header doesn't contain a non-parameterized 'main' value part, 'getValue()'
* will return the value of the first parameter.
*
* For example: 'Content-Type: text/html; charset=utf-8; name=test.ext'
*
* The 'text/html' portion is considered the 'main' value, and 'charset' and
* 'name' are added as parameterized name/value pairs.
*
* With the Autocrypt header, there is no main value portion, for example:
* 'Autocrypt: addr=zb@example.com; keydata=b64-data'
*
* In that example, calling ```php $header->getValue() ``` would return
* 'zb@example.com', as would calling ```php $header->getValueFor('addr'); ```.
*
* @author Zaahid Bateson
*/
class ParameterHeader extends AbstractHeader
{
/**
* @var ParameterPart[] key map of lower-case parameter names and associated
* ParameterParts.
*/
protected array $parameters = [];
public function __construct(
string $name,
string $value,
?LoggerInterface $logger = null,
?ParameterConsumerService $consumerService = null
) {
$di = MailMimeParser::getGlobalContainer();
parent::__construct(
$logger ?? $di->get(LoggerInterface::class),
$consumerService ?? $di->get(ParameterConsumerService::class),
$name,
$value
);
}
/**
* Overridden to assign ParameterParts to a map of lower-case parameter
* names to ParameterParts.
*/
protected function parseHeaderValue(IConsumerService $consumer, string $value) : void
{
parent::parseHeaderValue($consumer, $value);
foreach ($this->parts as $part) {
if ($part instanceof NameValuePart) {
$this->parameters[\strtolower($part->getName())] = $part;
}
}
}
/**
* Returns true if a parameter exists with the passed name.
*
* @param string $name The parameter to look up.
*/
public function hasParameter(string $name) : bool
{
return isset($this->parameters[\strtolower($name)]);
}
/**
* Returns the value of the parameter with the given name, or $defaultValue
* if not set.
*
* @param string $name The parameter to retrieve.
* @param string $defaultValue Optional default value (defaulting to null if
* not provided).
* @return string|null The parameter's value.
*/
public function getValueFor(string $name, ?string $defaultValue = null) : ?string
{
if (!$this->hasParameter($name)) {
return $defaultValue;
}
return $this->parameters[\strtolower($name)]->getValue();
}
}

View File

@@ -0,0 +1,86 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use Psr\Log\LogLevel;
use ZBateson\MbWrapper\MbWrapper;
/**
* Holds a group of addresses and a group name.
*
* @author Zaahid Bateson
*/
class AddressGroupPart extends NameValuePart
{
/**
* @var AddressPart[] an array of AddressParts
*/
protected array $addresses;
/**
* Creates an AddressGroupPart out of the passed array of AddressParts/
* AddressGroupParts and name.
*
* @param HeaderPart[] $nameParts
* @param AddressPart[]|AddressGroupPart[] $addressesAndGroupParts
*/
public function __construct(
LoggerInterface $logger,
MbWrapper $charsetConverter,
array $nameParts,
array $addressesAndGroupParts
) {
parent::__construct(
$logger,
$charsetConverter,
$nameParts,
$addressesAndGroupParts
);
$this->addresses = \array_merge(...\array_map(
fn ($p) => ($p instanceof AddressGroupPart) ? $p->getAddresses() : [$p],
$addressesAndGroupParts
));
// for backwards compatibility
$this->value = $this->name;
}
/**
* Return the AddressGroupPart's array of addresses.
*
* @return AddressPart[] An array of address parts.
*/
public function getAddresses() : array
{
return $this->addresses;
}
/**
* Returns the AddressPart at the passed index or null.
*
* @param int $index The 0-based index.
* @return ?AddressPart The address.
*/
public function getAddress(int $index) : ?AddressPart
{
if (!isset($this->addresses[$index])) {
return null;
}
return $this->addresses[$index];
}
protected function validate() : void
{
if ($this->name === null || \mb_strlen($this->name) === 0) {
$this->addError('Address group doesn\'t have a name', LogLevel::ERROR);
}
if (empty($this->addresses)) {
$this->addError('Address group doesn\'t have any email addresses defined in it', LogLevel::NOTICE);
}
}
}

View File

@@ -0,0 +1,57 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LogLevel;
/**
* Holds a single address or name/address pair.
*
* The name part of the address may be mime-encoded, but the email address part
* can't be mime-encoded. Any whitespace in the email address part is stripped
* out.
*
* A convenience method, getEmail, is provided for clarity -- but getValue
* returns the email address as well.
*
* @author Zaahid Bateson
*/
class AddressPart extends NameValuePart
{
protected function getValueFromParts(array $parts) : string
{
return \implode('', \array_map(
function($p) {
if ($p instanceof AddressPart) {
return $p->getValue();
} elseif ($p instanceof QuotedLiteralPart && $p->getValue() !== '') {
return '"' . \preg_replace('/(["\\\])/', '\\\$1', $p->getValue()) . '"';
}
return \preg_replace('/\s+/', '', $p->getValue());
},
$parts
));
}
/**
* Returns the email address.
*
* @return string The email address.
*/
public function getEmail() : string
{
return $this->value;
}
protected function validate() : void
{
if (empty($this->value)) {
$this->addError('Address doesn\'t contain an email address', LogLevel::ERROR);
}
}
}

View File

@@ -0,0 +1,77 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use ZBateson\MbWrapper\MbWrapper;
/**
* Represents a mime header comment -- text in a structured mime header
* value existing within parentheses.
*
* @author Zaahid Bateson
*/
class CommentPart extends ContainerPart
{
/**
* @var HeaderPartFactory used to create intermediate parts.
*/
protected HeaderPartFactory $partFactory;
/**
* @var string the contents of the comment
*/
protected string $comment;
public function __construct(
LoggerInterface $logger,
MbWrapper $charsetConverter,
HeaderPartFactory $partFactory,
array $children
) {
$this->partFactory = $partFactory;
parent::__construct($logger, $charsetConverter, $children);
$this->comment = $this->value;
$this->value = '';
$this->isSpace = true;
$this->canIgnoreSpacesBefore = true;
$this->canIgnoreSpacesAfter = true;
}
protected function getValueFromParts(array $parts) : string
{
$partFactory = $this->partFactory;
return parent::getValueFromParts(\array_map(
function($p) use ($partFactory) {
if ($p instanceof CommentPart) {
return $partFactory->newQuotedLiteralPart([$partFactory->newToken('(' . $p->getComment() . ')')]);
} elseif ($p instanceof QuotedLiteralPart) {
return $partFactory->newQuotedLiteralPart([$partFactory->newToken('"' . \str_replace('(["\\])', '\$1', $p->getValue()) . '"')]);
}
return $p;
},
$parts
));
}
/**
* Returns the comment's text.
*/
public function getComment() : string
{
return $this->comment;
}
/**
* Returns an empty string.
*/
public function getValue() : string
{
return '';
}
}

View File

@@ -0,0 +1,128 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\ErrorBag;
use ZBateson\MbWrapper\MbWrapper;
/**
* Base HeaderPart for a part that consists of other parts.
*
* The base container part constructs a string value out of the passed parts by
* concatenating their values, discarding whitespace between parts that can be
* ignored (in general allows for a single space but removes extras.)
*
* A ContainerPart can also contain any number of child comment parts. The
* CommentParts in this and all child parts can be returned by calling
* getComments.
*
* @author Zaahid Bateson
*/
class ContainerPart extends HeaderPart
{
/**
* @var HeaderPart[] parts that were used to create this part, collected for
* proper error reporting and validation.
*/
protected $children = [];
public function __construct(
LoggerInterface $logger,
MbWrapper $charsetConverter,
array $children
) {
ErrorBag::__construct($logger);
$this->charsetConverter = $charsetConverter;
$this->children = $children;
$str = (!empty($children)) ? $this->getValueFromParts($children) : '';
parent::__construct(
$logger,
$this->charsetConverter,
$str
);
}
/**
* Filters out ignorable space tokens.
*
* Spaces are removed if parts on either side of it have their
* canIgnoreSpaceAfter/canIgnoreSpaceBefore properties set to true.
*
* @param HeaderPart[] $parts
* @return HeaderPart[]
*/
protected function filterIgnoredSpaces(array $parts) : array
{
$ends = (object) ['isSpace' => true, 'canIgnoreSpacesAfter' => true, 'canIgnoreSpacesBefore' => true, 'value' => ''];
$spaced = \array_merge($parts, [$ends]);
$filtered = \array_slice(\array_reduce(
\array_slice(\array_keys($spaced), 0, -1),
function($carry, $key) use ($spaced, $ends) {
$p = $spaced[$key];
$l = \end($carry);
$a = $spaced[$key + 1];
if ($p->isSpace && $a === $ends) {
// trim
if ($l->isSpace) {
\array_pop($carry);
}
return $carry;
} elseif ($p->isSpace && ($l->isSpace || ($l->canIgnoreSpacesAfter && $a->canIgnoreSpacesBefore))) {
return $carry;
}
return \array_merge($carry, [$p]);
},
[$ends]
), 1);
return $filtered;
}
/**
* Creates the string value representation of this part constructed from the
* child parts passed to it.
*
* The default implementation filters out ignorable whitespace between
* parts, and concatenates parts calling 'getValue'.
*
* @param HeaderParts[] $parts
*/
protected function getValueFromParts(array $parts) : string
{
return \array_reduce($this->filterIgnoredSpaces($parts), fn ($c, $p) => $c . $p->getValue(), '');
}
/**
* Returns the child parts this container part consists of.
*
* @return IHeaderPart[]
*/
public function getChildParts() : array
{
return $this->children;
}
public function getComments() : array
{
return \array_merge(...\array_filter(\array_map(
fn ($p) => ($p instanceof CommentPart) ? [$p] : $p->getComments(),
$this->children
)));
}
/**
* Returns this part's children, same as getChildParts().
*
* @return ErrorBag
*/
protected function getErrorBagChildren() : array
{
return $this->children;
}
}

View File

@@ -0,0 +1,72 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use DateTime;
use Exception;
use Psr\Log\LoggerInterface;
use Psr\Log\LogLevel;
use ZBateson\MbWrapper\MbWrapper;
/**
* Represents the value of a date header, parsing the date into a \DateTime
* object.
*
* @author Zaahid Bateson
*/
class DatePart extends ContainerPart
{
/**
* @var DateTime the parsed date, or null if the date could not be parsed
*/
protected ?DateTime $date = null;
/**
* Tries parsing the passed token as an RFC 2822 date, and failing that into
* an RFC 822 date, and failing that, tries to parse it by calling
* new DateTime($value).
*
* @param HeaderPart[] $children
*/
public function __construct(
LoggerInterface $logger,
MbWrapper $charsetConverter,
array $children
) {
// parent::__construct converts character encoding -- may cause problems sometimes.
parent::__construct($logger, $charsetConverter, $children);
$this->value = $dateToken = \trim($this->value);
// Missing "+" in timezone definition. eg: Thu, 13 Mar 2014 15:02:47 0000 (not RFC compliant)
// Won't result in an Exception, but in a valid DateTime in year `0000` - therefore we need to check this first:
if (\preg_match('# [0-9]{4}$#', $dateToken)) {
$dateToken = \preg_replace('# ([0-9]{4})$#', ' +$1', $dateToken);
// @see https://bugs.php.net/bug.php?id=42486
} elseif (\preg_match('#UT$#', $dateToken)) {
$dateToken = $dateToken . 'C';
}
try {
$this->date = new DateTime($dateToken);
} catch (Exception $e) {
$this->addError(
"Unable to parse date from header: \"{$dateToken}\"",
LogLevel::ERROR,
$e
);
}
}
/**
* Returns a DateTime object or null if it can't be parsed.
*/
public function getDateTime() : ?DateTime
{
return $this->date;
}
}

View File

@@ -0,0 +1,116 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use Psr\Log\LogLevel;
use ZBateson\MailMimeParser\ErrorBag;
use ZBateson\MailMimeParser\Header\IHeaderPart;
use ZBateson\MbWrapper\MbWrapper;
use ZBateson\MbWrapper\UnsupportedCharsetException;
/**
* Abstract base class representing a single part of a parsed header.
*
* @author Zaahid Bateson
*/
abstract class HeaderPart extends ErrorBag implements IHeaderPart
{
/**
* @var string the representative value of the part after any conversion or
* processing has been done on it (e.g. removing new lines, converting,
* whatever else).
*/
protected string $value;
/**
* @var MbWrapper $charsetConverter the charset converter used for
* converting strings in HeaderPart::convertEncoding
*/
protected MbWrapper $charsetConverter;
/**
* @var bool set to true to ignore spaces before this part
*/
protected bool $canIgnoreSpacesBefore = false;
/**
* @var bool set to true to ignore spaces after this part
*/
protected bool $canIgnoreSpacesAfter = false;
/**
* True if the part is a space token
*/
protected bool $isSpace = false;
public function __construct(LoggerInterface $logger, MbWrapper $charsetConverter, string $value)
{
parent::__construct($logger);
$this->charsetConverter = $charsetConverter;
$this->value = $value;
}
/**
* Returns the part's representative value after any necessary processing
* has been performed. For the raw value, call getRawValue().
*/
public function getValue() : string
{
return $this->value;
}
/**
* Returns the value of the part (which is a string).
*
* @return string the value
*/
public function __toString() : string
{
return $this->value;
}
/**
* Ensures the encoding of the passed string is set to UTF-8.
*
* The method does nothing if the passed $from charset is UTF-8 already, or
* if $force is set to false and mb_check_encoding for $str returns true
* for 'UTF-8'.
*
* @return string utf-8 string
*/
protected function convertEncoding(string $str, string $from = 'ISO-8859-1', bool $force = false) : string
{
if ($from !== 'UTF-8') {
// mime header part decoding will force it. This is necessary for
// UTF-7 because mb_check_encoding will return true
if ($force || !($this->charsetConverter->checkEncoding($str, 'UTF-8'))) {
try {
return $this->charsetConverter->convert($str, $from, 'UTF-8');
} catch (UnsupportedCharsetException $ce) {
$this->addError('Unable to convert charset', LogLevel::ERROR, $ce);
return $this->charsetConverter->convert($str, 'ISO-8859-1', 'UTF-8');
}
}
}
return $str;
}
public function getComments() : array
{
return [];
}
/**
* Default implementation returns an empty array.
*/
protected function getErrorBagChildren() : array
{
return [];
}
}

View File

@@ -0,0 +1,176 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\IHeaderPart;
use ZBateson\MbWrapper\MbWrapper;
/**
* Constructs and returns IHeaderPart objects.
*
* @author Zaahid Bateson
*/
class HeaderPartFactory
{
/**
* @var MbWrapper $charsetConverter passed to IHeaderPart constructors
* for converting strings in IHeaderPart::convertEncoding
*/
protected MbWrapper $charsetConverter;
protected LoggerInterface $logger;
public function __construct(LoggerInterface $logger, MbWrapper $charsetConverter)
{
$this->logger = $logger;
$this->charsetConverter = $charsetConverter;
}
/**
* Creates and returns a default IHeaderPart for this factory, allowing
* subclass factories for specialized IHeaderParts.
*
* The default implementation returns a new Token
*/
public function newInstance(string $value) : IHeaderPart
{
return $this->newToken($value);
}
/**
* Initializes and returns a new Token.
*/
public function newToken(string $value, bool $isLiteral = false, bool $preserveSpaces = false) : Token
{
return new Token($this->logger, $this->charsetConverter, $value, $isLiteral, $preserveSpaces);
}
/**
* Initializes and returns a new SubjectToken.
*/
public function newSubjectToken(string $value) : SubjectToken
{
return new SubjectToken($this->logger, $this->charsetConverter, $value);
}
/**
* Initializes and returns a new MimeToken.
*/
public function newMimeToken(string $value) : MimeToken
{
return new MimeToken($this->logger, $this->charsetConverter, $value);
}
/**
* Initializes and returns a new ContainerPart.
*
* @param HeaderPart[] $children
*/
public function newContainerPart(array $children) : ContainerPart
{
return new ContainerPart($this->logger, $this->charsetConverter, $children);
}
/**
* Instantiates and returns a SplitParameterPart.
*
* @param ParameterPart[] $children
*/
public function newSplitParameterPart(array $children) : SplitParameterPart
{
return new SplitParameterPart($this->logger, $this->charsetConverter, $this, $children);
}
/**
* Initializes and returns a new QuotedLiteralPart.
*
* @param HeaderPart[] $parts
*/
public function newQuotedLiteralPart(array $parts) : QuotedLiteralPart
{
return new QuotedLiteralPart($this->logger, $this->charsetConverter, $parts);
}
/**
* Initializes and returns a new CommentPart.
*
* @param HeaderPart[] $children
*/
public function newCommentPart(array $children) : CommentPart
{
return new CommentPart($this->logger, $this->charsetConverter, $this, $children);
}
/**
* Initializes and returns a new AddressPart.
*
* @param HeaderPart[] $nameParts
* @param HeaderPart[] $emailParts
*/
public function newAddress(array $nameParts, array $emailParts) : AddressPart
{
return new AddressPart($this->logger, $this->charsetConverter, $nameParts, $emailParts);
}
/**
* Initializes and returns a new AddressGroupPart
*
* @param HeaderPart[] $nameParts
* @param AddressPart[]|AddressGroupPart[] $addressesAndGroups
*/
public function newAddressGroupPart(array $nameParts, array $addressesAndGroups) : AddressGroupPart
{
return new AddressGroupPart($this->logger, $this->charsetConverter, $nameParts, $addressesAndGroups);
}
/**
* Initializes and returns a new DatePart
*
* @param HeaderPart[] $children
*/
public function newDatePart(array $children) : DatePart
{
return new DatePart($this->logger, $this->charsetConverter, $children);
}
/**
* Initializes and returns a new ParameterPart.
*
* @param HeaderPart[] $nameParts
*/
public function newParameterPart(array $nameParts, ContainerPart $valuePart) : ParameterPart
{
return new ParameterPart($this->logger, $this->charsetConverter, $nameParts, $valuePart);
}
/**
* Initializes and returns a new ReceivedPart.
*
* @param HeaderPart[] $children
*/
public function newReceivedPart(string $name, array $children) : ReceivedPart
{
return new ReceivedPart($this->logger, $this->charsetConverter, $name, $children);
}
/**
* Initializes and returns a new ReceivedDomainPart.
*
* @param HeaderPart[] $children
*/
public function newReceivedDomainPart(string $name, array $children) : ReceivedDomainPart
{
return new ReceivedDomainPart(
$this->logger,
$this->charsetConverter,
$name,
$children
);
}
}

View File

@@ -0,0 +1,109 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use ZBateson\MbWrapper\MbWrapper;
/**
* Represents a single mime header part token, with the possibility of it being
* MIME-Encoded as per RFC-2047.
*
* MimeToken automatically decodes the value if it's encoded.
*
* @author Zaahid Bateson
*/
class MimeToken extends Token
{
/**
* @var string regex pattern matching a mime-encoded part
*/
public const MIME_PART_PATTERN = '=\?[^?=]+\?[QBqb]\?[^\?]+\?=';
/**
* @var string regex pattern used when parsing parameterized headers
*/
public const MIME_PART_PATTERN_NO_QUOTES = '=\?[^\?=]+\?[QBqb]\?[^\?"]+\?=';
/**
* @var ?string the language code if any, or null otherwise
*/
protected ?string $language = null;
/**
* @var ?string the charset if any, or null otherwise
*/
protected ?string $charset = null;
public function __construct(LoggerInterface $logger, MbWrapper $charsetConverter, string $value)
{
parent::__construct($logger, $charsetConverter, $value);
$this->value = $this->decodeMime(\preg_replace('/\r|\n/', '', $this->value));
$pattern = self::MIME_PART_PATTERN;
$this->canIgnoreSpacesBefore = (bool) \preg_match("/^\s*{$pattern}|\s+/", $this->rawValue);
$this->canIgnoreSpacesAfter = (bool) \preg_match("/{$pattern}\s*|\s+\$/", $this->rawValue);
}
/**
* Finds and replaces mime parts with their values.
*
* The method splits the token value into an array on mime-part-patterns,
* either replacing a mime part with its value by calling iconv_mime_decode
* or converts the encoding on the text part by calling convertEncoding.
*/
protected function decodeMime(string $value) : string
{
if (\preg_match('/^=\?([A-Za-z\-_0-9]+)\*?([A-Za-z\-_0-9]+)?\?([QBqb])\?([^\?]*)\?=$/', $value, $matches)) {
return $this->decodeMatchedEntity($matches);
}
return $this->convertEncoding($value);
}
/**
* Decodes a matched mime entity part into a string and returns it, after
* adding the string into the languages array.
*
* @param string[] $matches
*/
private function decodeMatchedEntity(array $matches) : string
{
$body = $matches[4];
if (\strtoupper($matches[3]) === 'Q') {
$body = \quoted_printable_decode(\str_replace('_', '=20', $body));
} else {
$body = \base64_decode($body);
}
$this->charset = $matches[1];
$this->language = (!empty($matches[2])) ? $matches[2] : null;
if ($this->charset !== null) {
return $this->convertEncoding($body, $this->charset, true);
}
return $this->convertEncoding($body, 'ISO-8859-1', true);
}
/**
* Returns the language code for the mime part.
*/
public function getLanguage() : ?string
{
return $this->language;
}
/**
* Returns the charset for the encoded part.
*/
public function getCharset() : ?string
{
return $this->charset;
}
public function getRawValue() : string
{
return $this->rawValue;
}
}

View File

@@ -0,0 +1,27 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use ZBateson\MailMimeParser\Header\IHeaderPart;
/**
* Extends HeaderPartFactory to instantiate MimeTokens for its
* newInstance method.
*
* @author Zaahid Bateson
*/
class MimeTokenPartFactory extends HeaderPartFactory
{
/**
* Creates and returns a MimeToken.
*/
public function newInstance(string $value) : IHeaderPart
{
return $this->newMimeToken($value);
}
}

View File

@@ -0,0 +1,65 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use Psr\Log\LogLevel;
use ZBateson\MailMimeParser\ErrorBag;
use ZBateson\MbWrapper\MbWrapper;
/**
* Represents a name/value pair part of a header.
*
* @author Zaahid Bateson
*/
class NameValuePart extends ContainerPart
{
/**
* @var string the name of the part
*/
protected string $name;
public function __construct(
LoggerInterface $logger,
MbWrapper $charsetConverter,
array $nameParts,
array $valueParts
) {
ErrorBag::__construct($logger);
$this->charsetConverter = $charsetConverter;
$this->name = (!empty($nameParts)) ? $this->getNameFromParts($nameParts) : '';
parent::__construct($logger, $charsetConverter, $valueParts);
\array_unshift($this->children, ...$nameParts);
}
/**
* Creates the string 'name' representation of this part constructed from
* the child name parts passed to it.
*
* @param HeaderParts[] $parts
*/
protected function getNameFromParts(array $parts) : string
{
return \array_reduce($this->filterIgnoredSpaces($parts), fn ($c, $p) => $c . $p->getValue(), '');
}
/**
* Returns the name of the name/value part.
*/
public function getName() : string
{
return $this->name;
}
protected function validate() : void
{
if ($this->value === '') {
$this->addError('NameValuePart value is empty', LogLevel::NOTICE);
}
}
}

View File

@@ -0,0 +1,119 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use ZBateson\MbWrapper\MbWrapper;
/**
* Represents a name/value parameter part of a header.
*
* @author Zaahid Bateson
*/
class ParameterPart extends NameValuePart
{
/**
* @var string the RFC-1766 language tag if set.
*/
protected ?string $language = null;
/**
* @var string charset of content if set.
*/
protected ?string $charset = null;
/**
* @var int the zero-based index of the part if part of a 'continuation' in
* an RFC-2231 split parameter.
*/
protected ?int $index = null;
/**
* @var bool true if the part is an RFC-2231 encoded part, and the value
* needs to be decoded.
*/
protected bool $encoded = false;
/**
* @param HeaderPart[] $nameParts
*/
public function __construct(
LoggerInterface $logger,
MbWrapper $charsetConverter,
array $nameParts,
ContainerPart $valuePart
) {
parent::__construct($logger, $charsetConverter, $nameParts, $valuePart->children);
}
protected function getNameFromParts(array $parts) : string
{
$name = parent::getNameFromParts($parts);
if (\preg_match('~^\s*([^\*]+)\*(\d*)(\*)?$~', $name, $matches)) {
$name = $matches[1];
$this->index = ($matches[2] !== '') ? (int) ($matches[2]) : null;
$this->encoded = (($matches[2] === '') || !empty($matches[3]));
}
return $name;
}
protected function decodePartValue(string $value, ?string $charset = null) : string
{
if ($charset !== null) {
return $this->convertEncoding(\rawurldecode($value), $charset, true);
}
return $this->convertEncoding(\rawurldecode($value));
}
protected function getValueFromParts(array $parts) : string
{
$value = parent::getValueFromParts($parts);
if ($this->encoded && \preg_match('~^([^\']*)\'?([^\']*)\'?(.*)$~', $value, $matches)) {
$this->charset = (!empty($matches[1]) && !empty($matches[3])) ? $matches[1] : $this->charset;
$this->language = (!empty($matches[2])) ? $matches[2] : $this->language;
$ev = (empty($matches[3])) ? $matches[1] : $matches[3];
// only if it's not part of a SplitParameterPart
if ($this->index === null) {
// subsequent parts are decoded as a SplitParameterPart since only
// the first part are supposed to have charset/language fields
return $this->decodePartValue($ev, $this->charset);
}
return $ev;
}
return $value;
}
/**
* Returns the charset if the part is an RFC-2231 part with a charset set.
*/
public function getCharset() : ?string
{
return $this->charset;
}
/**
* Returns the RFC-1766 (or subset) language tag, if the parameter is an
* RFC-2231 part with a language tag set.
*
* @return ?string the language if set, or null if not
*/
public function getLanguage() : ?string
{
return $this->language;
}
public function isUrlEncoded() : bool
{
return $this->encoded;
}
public function getIndex() : ?int
{
return $this->index;
}
}

View File

@@ -0,0 +1,46 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
/**
* A quoted literal header string part. The value of the part is stripped of CR
* and LF characters, and whitespace between two adjacent MimeTokens is removed.
*
* @author Zaahid Bateson
*/
class QuotedLiteralPart extends ContainerPart
{
/**
* Strips spaces found between two adjacent MimeToken parts.
* Other whitespace is returned as-is.
*
* @param HeaderPart[] $parts
* @return HeaderPart[]
*/
protected function filterIgnoredSpaces(array $parts) : array
{
$filtered = \array_reduce(
\array_keys($parts),
function($carry, $key) use ($parts) {
$cur = $parts[$key];
$last = ($carry !== null) ? \end($carry) : null;
$next = (count($parts) > $key + 1) ? $parts[$key + 1] : null;
if ($last !== null && $next !== null && $cur->isSpace && (
$last->canIgnoreSpacesAfter
&& $next->canIgnoreSpacesBefore
&& $last instanceof MimeToken
&& $next instanceof MimeToken
)) {
return $carry;
}
return \array_merge($carry ?? [], [$cur]);
}
);
return $filtered;
}
}

View File

@@ -0,0 +1,103 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use ZBateson\MbWrapper\MbWrapper;
/**
* Holds extra information about a parsed Received header part, for FROM and BY
* parts, namely: ehlo name, hostname, and address.
*
* The parsed parts would be mapped as follows:
*
* FROM ehlo name (hostname [address]), for example: FROM computer (domain.com
* [1.2.3.4]) would contain "computer" for getEhloName(), domain.com for
* getHostname and 1.2.3.4 for getAddress().
*
* This doesn't change if the ehlo name is an address, it is still returned in
* getEhloName(), and not in getAddress(). Additionally square brackets are not
* stripped from getEhloName() if its an address. For example: "FROM [1.2.3.4]"
* would return "[1.2.3.4]" in a call to getEhloName().
*
* For further information on how the header's parsed, check the documentation
* for {@see \ZBateson\MailMimeParser\Header\Consumer\Received\DomainConsumer}.
*
* @author Zaahid Bateson
*/
class ReceivedDomainPart extends ReceivedPart
{
/**
* @var string The name used to identify the server in the EHLO line.
*/
protected ?string $ehloName = null;
/**
* @var string The hostname.
*/
protected ?string $hostname = null;
/**
* @var string The address.
*/
protected ?string $address = null;
/**
* @param HeaderPart[] $children
*/
public function __construct(
LoggerInterface $logger,
MbWrapper $charsetConverter,
string $name,
array $children
) {
parent::__construct($logger, $charsetConverter, $name, $children);
$this->ehloName = ($this->value !== '') ? $this->value : null;
$cps = $this->getComments();
$commentPart = (!empty($cps)) ? $cps[0] : null;
$pattern = '~^(\[(IPv[64])?(?P<addr1>[a-f\d\.\:]+)\])?\s*(helo=)?(?P<name>[a-z0-9\-]+[a-z0-9\-\.]+)?\s*(\[(IPv[64])?(?P<addr2>[a-f\d\.\:]+)\])?$~i';
if ($commentPart !== null && \preg_match($pattern, $commentPart->getComment(), $matches)) {
$this->value .= ' (' . $commentPart->getComment() . ')';
$this->hostname = (!empty($matches['name'])) ? $matches['name'] : null;
$this->address = (!empty($matches['addr1'])) ? $matches['addr1'] : ((!empty($matches['addr2'])) ? $matches['addr2'] : null);
}
}
/**
* Returns the name used to identify the server in the first part of the
* extended-domain line.
*
* Note that this is not necessarily the name used in the EHLO line to an
* SMTP server, since implementations differ so much, not much can be
* guaranteed except the position it was parsed in.
*/
public function getEhloName() : ?string
{
return $this->ehloName;
}
/**
* Returns the hostname of the server, or whatever string in the hostname
* position when parsing (but never an address).
*/
public function getHostname() : ?string
{
return $this->hostname;
}
/**
* Returns the address of the server, or whatever string that looks like an
* address in the address position when parsing (but never a hostname).
*/
public function getAddress() : ?string
{
return $this->address;
}
}

View File

@@ -0,0 +1,37 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use ZBateson\MbWrapper\MbWrapper;
/**
* Represents one parameter in a parsed 'Received' header, e.g. the FROM or VIA
* part.
*
* Note that FROM and BY actually get parsed into a sub-class,
* ReceivedDomainPart which keeps track of other sub-parts that can be parsed
* from them.
*
* @author Zaahid Bateson
*/
class ReceivedPart extends NameValuePart
{
/**
* @param HeaderPart[] $children
*/
public function __construct(
LoggerInterface $logger,
MbWrapper $charsetConverter,
string $name,
array $children
) {
parent::__construct($logger, $charsetConverter, [], $children);
$this->name = $name;
}
}

View File

@@ -0,0 +1,102 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use ZBateson\MbWrapper\MbWrapper;
/**
* Holds a running value for an RFC-2231 split header parameter.
*
* ParameterConsumer creates SplitParameterTokens when a split header parameter
* is first found, and adds subsequent split parts to an already created one if
* the parameter name matches.
*
* @author Zaahid Bateson
*/
class SplitParameterPart extends ParameterPart
{
/**
* @var HeaderPartFactory used to create combined MimeToken parts.
*/
protected HeaderPartFactory $partFactory;
/**
* Initializes a SplitParameterToken.
*
* @param ParameterPart[] $children
*/
public function __construct(
LoggerInterface $logger,
MbWrapper $charsetConverter,
HeaderPartFactory $headerPartFactory,
array $children
) {
$this->partFactory = $headerPartFactory;
NameValuePart::__construct($logger, $charsetConverter, [$children[0]], $children);
$this->children = $children;
}
protected function getNameFromParts(array $parts) : string
{
return $parts[0]->getName();
}
private function getMimeTokens(string $value) : array
{
$pattern = MimeToken::MIME_PART_PATTERN;
// remove whitespace between two adjacent mime encoded parts
$normed = \preg_replace("/($pattern)\\s+(?=$pattern)/", '$1', $value);
// with PREG_SPLIT_DELIM_CAPTURE, matched and unmatched parts are returned
$aMimeParts = \preg_split("/($pattern)/", $normed, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
return \array_map(
fn ($p) => (\preg_match("/$pattern/", $p)) ? $this->partFactory->newMimeToken($p) : $this->partFactory->newToken($p, true, true),
$aMimeParts
);
}
private function combineAdjacentUnencodedParts(array $parts) : array
{
$runningValue = '';
$returnedParts = [];
foreach ($parts as $part) {
if (!$part->encoded) {
$runningValue .= $part->value;
continue;
}
if (!empty($runningValue)) {
$returnedParts = \array_merge($returnedParts, $this->getMimeTokens($runningValue));
$runningValue = '';
}
$returnedParts[] = $part;
}
if (!empty($runningValue)) {
$returnedParts = \array_merge($returnedParts, $this->getMimeTokens($runningValue));
}
return $returnedParts;
}
protected function getValueFromParts(array $parts) : string
{
$sorted = $parts;
\usort($sorted, fn ($a, $b) => $a->index <=> $b->index);
$first = $sorted[0];
$this->language = $first->language;
$charset = $this->charset = $first->charset;
$combined = $this->combineAdjacentUnencodedParts($sorted);
return \implode('', \array_map(
fn ($p) => ($p instanceof ParameterPart && $p->encoded)
? $this->decodePartValue($p->getValue(), ($p->charset === null) ? $charset : $p->charset)
: $p->getValue(),
$combined
));
}
}

View File

@@ -0,0 +1,40 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use ZBateson\MbWrapper\MbWrapper;
/**
* Specialized token for subjects that preserves whitespace, except for new
* lines.
*
* New lines are either discarded if followed by a whitespace as should happen
* with folding whitespace, or replaced by a single space character if somehow
* aren't followed by whitespace.
*
* @author Zaahid Bateson
*/
class SubjectToken extends Token
{
public function __construct(
LoggerInterface $logger,
MbWrapper $charsetConverter,
string $value
) {
parent::__construct($logger, $charsetConverter, $value, true);
$this->value = \preg_replace(['/(\r|\n)+(\s)\s*/', '/(\r|\n)+/'], ['$2', ' '], $value);
$this->isSpace = (\preg_match('/^\s*$/m', $this->value) === 1);
$this->canIgnoreSpacesBefore = $this->canIgnoreSpacesAfter = $this->isSpace;
}
public function getValue() : string
{
return $this->convertEncoding($this->value);
}
}

View File

@@ -0,0 +1,66 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header\Part;
use Psr\Log\LoggerInterface;
use ZBateson\MbWrapper\MbWrapper;
/**
* Holds a string value token that will require additional processing by a
* consumer prior to returning to a client.
*
* A Token is meant to hold a value for further processing -- for instance when
* consuming an address list header (like From or To) -- before it's known what
* type of IHeaderPart it is (could be an email address, could be a name, or
* could be a group.)
*
* @author Zaahid Bateson
*/
class Token extends HeaderPart
{
/**
* @var string the raw value of the part.
*/
protected string $rawValue;
public function __construct(
LoggerInterface $logger,
MbWrapper $charsetConverter,
string $value,
bool $isLiteral = false,
bool $preserveSpaces = false
) {
parent::__construct($logger, $charsetConverter, $value);
$this->rawValue = $value;
if (!$isLiteral) {
$this->value = \preg_replace(['/(\r|\n)+(\s)/', '/(\r|\n)+/'], ['$2', ' '], $value);
if (!$preserveSpaces) {
$this->value = \preg_replace('/^\s+$/m', ' ', $this->value);
}
}
$this->isSpace = ($this->value === '' || (!$isLiteral && \preg_match('/^\s*$/m', $this->value) === 1));
$this->canIgnoreSpacesBefore = $this->canIgnoreSpacesAfter = $this->isSpace;
}
/**
* Returns the part's representative value after any necessary processing
* has been performed. For the raw value, call getRawValue().
*/
public function getValue() : string
{
return $this->convertEncoding($this->value);
}
/**
* Returns the part's raw value.
*/
public function getRawValue() : string
{
return $this->rawValue;
}
}

View File

@@ -0,0 +1,235 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use DateTime;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Consumer\ReceivedConsumerService;
use ZBateson\MailMimeParser\Header\Part\DatePart;
use ZBateson\MailMimeParser\MailMimeParser;
/**
* Represents a Received header.
*
* The returned header value (as returned by a call to {@see
* ReceivedHeader::getValue()}) for a
* ReceivedHeader is the same as the raw value (as returned by a call to
* {@see ReceivedHeader::getRawValue()}) since the header doesn't have a single
* 'value' to consider 'the value'.
*
* The parsed parts of a Received header can be accessed as parameters. To
* check if a part exists, call {@see ReceivedHeader::hasParameter()} with the
* name of the part, for example: ```php $header->hasParameter('from') ``` or
* ```php $header->hasParameter('id') ```. The value of the part can be obtained
* by calling {@see ReceivedHeader::getValueFor()}, for example
* ```php $header->getValueFor('with'); ```.
*
* Additional parsing is performed on the "FROM" and "BY" parts of a received
* header in an attempt to extract the self-identified name of the server, its
* hostname, and its address (depending on what's included). These can be
* accessed directly from the ReceivedHeader object by calling one of the
* following methods:
*
* o {@see ReceivedHeader::getFromName()} -- the name portion of the FROM part
* o {@see ReceivedHeader::getFromHostname()} -- the hostname of the FROM part
* o {@see ReceivedHeader::getFromAddress()} -- the adddress portion of the FROM
* part
* o {@see ReceivedHeader::getByName()} -- same as getFromName, but for the BY
* part, and etc... below
* o {@see ReceivedHeader::getByHostname()}
* o {@see ReceivedHeader::getByAddress()}
*
* The parsed parts of the FROM and BY parts are determined as follows:
*
* o Anything outside and before a parenthesized expression is considered "the
* name", for example "FROM AlainDeBotton", "AlainDeBotton" would be the name,
* but also if the name is an address, but exists outside the parenthesized
* expression, it's still considered "the name". For example:
* "From [1.2.3.4]", getFromName would return "[1.2.3.4]".
* o A parenthesized expression MUST match what looks like either a domain name
* on its own, or a domain name and an address. Otherwise the parenthesized
* expression is considered a comment, and not parsed into hostname and
* address. The rules are defined loosely because many implementations differ
* in how strictly they follow the standard. For a domain, it's enough that
* the expression starts with any alphanumeric character and contains at least
* one '.', followed by any number of '.', '-' and alphanumeric characters.
* The address portion must be surrounded in square brackets, and contain any
* sequence of '.', ':', numbers, and characters 'a' through 'f'. In addition
* the string 'ipv6' may start the expression (for instance, '[ipv6:::1]'
* would be valid). A port number may also be considered valid as part of the
* address, for example: [1.2.3.4:3231]. No additional validation on the
* address is done, and so an invalid address such as '....' could be
* returned, so users using the 'address' header are encouraged to validate it
* before using it. The square brackets are parsed out of the returned
* address, so the value returned by getFromAddress() would be "2.2.2.2", not
* "[2.2.2.2]".
*
* The date/time stamp can be accessed as a DateTime object by calling
* {@see ReceivedHeader::getDateTime()}.
*
* Parsed comments can be accessed by calling {@see
* ReceivedHeader::getComments()}. Some implementations may include connection
* encryption information or other details in non-standardized comments.
*
* @author Zaahid Bateson
*/
class ReceivedHeader extends ParameterHeader
{
/**
* @var DateTime the date/time stamp in the header.
*/
private ?DateTime $date = null;
/**
* @var bool set to true once $date has been looked for
*/
private bool $dateSet = false;
public function __construct(
string $name,
string $value,
?LoggerInterface $logger = null,
?ReceivedConsumerService $consumerService = null
) {
$di = MailMimeParser::getGlobalContainer();
AbstractHeader::__construct(
$logger ?? $di->get(LoggerInterface::class),
$consumerService ?? $di->get(ReceivedConsumerService::class),
$name,
$value
);
}
/**
* Returns the raw, unparsed header value, same as {@see
* ReceivedHeader::getRawValue()}.
*/
public function getValue() : ?string
{
return $this->rawValue;
}
/**
* Returns the name identified in the FROM part of the header or null if not
* defined or the format wasn't parsed.
*
* The returned value may either be a name or an address in the form
* "[1.2.3.4]". Validation is not performed on this value, and so whatever
* exists in this position is returned -- be it contains spaces, or invalid
* characters, etc...
*
* @return ?string The 'FROM' name.
*/
public function getFromName() : ?string
{
return (isset($this->parameters['from'])) ?
$this->parameters['from']->getEhloName() : null;
}
/**
* Returns the hostname part of a parenthesized FROM part or null if not
* defined or the format wasn't parsed.
*
* For example, "FROM name (host.name)" would return the string "host.name".
* Validation of the hostname is not performed, and the returned value may
* not be valid. More details on how the value is parsed and extracted can
* be found in the class description for {@see ReceivedHeader}.
*
* @return ?string The 'FROM' hostname.
*/
public function getFromHostname() : ?string
{
return (isset($this->parameters['from'])) ?
$this->parameters['from']->getHostname() : null;
}
/**
* Returns the address part of a parenthesized FROM part or null if not
* defined or the format wasn't parsed.
*
* For example, "FROM name ([1.2.3.4])" would return the string "1.2.3.4".
* Validation of the address is not performed, and the returned value may
* not be valid. More details on how the value is parsed and extracted can
* be found in the class description for {@see ReceivedHeader}.
*
* @return ?string The 'FROM' address.
*/
public function getFromAddress() : ?string
{
return (isset($this->parameters['from'])) ?
$this->parameters['from']->getAddress() : null;
}
/**
* Returns the name identified in the BY part of the header or null if not
* defined or the format wasn't parsed.
*
* The returned value may either be a name or an address in the form
* "[1.2.3.4]". Validation is not performed on this value, and so whatever
* exists in this position is returned -- be it contains spaces, or invalid
* characters, etc...
*
* @return ?string The 'BY' name.
*/
public function getByName() : ?string
{
return (isset($this->parameters['by'])) ?
$this->parameters['by']->getEhloName() : null;
}
/**
* Returns the hostname part of a parenthesized BY part or null if not
* defined or the format wasn't parsed.
*
* For example, "BY name (host.name)" would return the string "host.name".
* Validation of the hostname is not performed, and the returned value may
* not be valid. More details on how the value is parsed and extracted can
* be found in the class description for {@see ReceivedHeader}.
*
* @return ?string The 'BY' hostname.
*/
public function getByHostname() : ?string
{
return (isset($this->parameters['by'])) ?
$this->parameters['by']->getHostname() : null;
}
/**
* Returns the address part of a parenthesized BY part or null if not
* defined or the format wasn't parsed.
*
* For example, "BY name ([1.2.3.4])" would return the string "1.2.3.4".
* Validation of the address is not performed, and the returned value may
* not be valid. More details on how the value is parsed and extracted can
* be found in the class description for {@see ReceivedHeader}.
*
* @return ?string The 'BY' address.
*/
public function getByAddress() : ?string
{
return (isset($this->parameters['by'])) ?
$this->parameters['by']->getAddress() : null;
}
/**
* Returns the date/time stamp for the received header if set, or null
* otherwise.
*/
public function getDateTime() : ?DateTime
{
if ($this->dateSet === false) {
foreach ($this->parts as $part) {
if ($part instanceof DatePart) {
$this->date = $part->getDateTime();
}
}
$this->dateSet = true;
}
return $this->date;
}
}

View File

@@ -0,0 +1,38 @@
<?php
/**
* This file is part of the ZBateson\MailMimeParser project.
*
* @license http://opensource.org/licenses/bsd-license.php BSD
*/
namespace ZBateson\MailMimeParser\Header;
use Psr\Log\LoggerInterface;
use ZBateson\MailMimeParser\Header\Consumer\SubjectConsumerService;
use ZBateson\MailMimeParser\MailMimeParser;
/**
* Reads a subject header.
*
* The subject header is unique in that it doesn't include comments or quoted
* parts.
*
* @author Zaahid Bateson
*/
class SubjectHeader extends AbstractHeader
{
public function __construct(
string $name,
string $value,
?LoggerInterface $logger = null,
?SubjectConsumerService $consumerService = null
) {
$di = MailMimeParser::getGlobalContainer();
parent::__construct(
$logger ?? $di->get(LoggerInterface::class),
$consumerService ?? $di->get(SubjectConsumerService::class),
$name,
$value
);
}
}