<?php

namespace Velis\Mail;

use ArrayIterator;
use Exception;
use Velis\Lang;
use Velis\Model\BaseModel;
use Zend\Mail\Exception\RuntimeException;
use Zend\Mail\Header\Exception\InvalidArgumentException as InvalidHeaderException;
use Zend\Mail\Header\HeaderInterface;
use Zend\Mail\Headers;
use Zend\Mail\Storage\Message;
use Zend\Mail\Storage\Part\PartInterface;

/**
 * Email parser based on Conjoon project
 * @author Olek Procki <olo@velis.pl>
 */
class Parser extends BaseModel
{
    private $_lastIconvError = false;

    private $_attachedEmails = [];


    /**
     * Parse multipart message
     *
     * @param PartInterface $message
     * @return array
     */
    public function parse(&$message)
    {
        return $this->_fetchEmail($message);
    }


    /**
     * Returns attached emails
     * @return array
     */
    public function getAttachedEmails()
    {
        return $this->_attachedEmails;
    }


    /**
     * Parse multipart mail & returned parsed data as array
     *
     * @param Message $message
     * @return array
     */
    private function _fetchEmail(&$message)
    {
        $emailItem             = array();
        $this->_attachedEmails = array();

        $encodingInformation = $this->getEncodingInformation($message);

        $contentType = $encodingInformation['contentType'];

        switch ($contentType) {
            case 'text/plain':
                $emailItem['contentTextPlain'] = $this->_decode($message->getContent(), $encodingInformation);
                break;

            case 'text/html':
                $emailItem['contentTextHtml'] = $this->_decode($message->getContent(), $encodingInformation);
                break;

            case 'text/calendar':
                $emailItem['contentTextCalendar'] = $this->_decode($message->getContent(), $encodingInformation);
                break;

            case 'multipart/mixed':
                $this->_parseMultipartMixed($message, $emailItem);
                break;

            case 'multipart/alternative':
                $this->_parseMultipartAlternative($message, $emailItem);
                break;

            case 'multipart/related':
                $this->_parseMultipartRelated($message, $emailItem);
                break;

            case 'multipart/signed':
                $this->_parseMultipartSigned($message, $emailItem);
                break;

            case 'multipart/report':
                $this->_parseMultipartReport($message, $emailItem);
                break;

            default:
                $emailItem['contentTextPlain'] = $this->_decode($message->getContent(), $encodingInformation);
                break;
        }

        if (!isset($emailItem['contentTextPlain'])) {
            $emailItem['contentTextPlain'] = '';
        }

        if (!isset($emailItem['contentTextHtml'])) {
            $emailItem['contentTextHtml'] = '';
        }

        return $emailItem;
    }


    /**
     * Decodes message using iconv
     *
     * @param string $text
     * @param array $encodingInformation
     *
     * @return string
     */
    private function _decode($text, array $encodingInformation)
    {
        $charset = $encodingInformation['charset'];
        $contentTransferEncoding = $encodingInformation['contentTransferEncoding'];

        switch ($contentTransferEncoding) {
            case 'base64':
                $text = base64_decode($text);
                break;
            case 'quoted-printable':
                $text = quoted_printable_decode($text);
                break;
        }

        if (strtolower($charset) == 'iso-8859-1') {
            $charset = 'windows-1252';
        }

        $this->_setIconvErrorHandler();
        if ($charset != "") {
            $conv = iconv($charset, 'UTF-8', $text);

            // first off, check if the charset is windows-1250 if  encoding fails
            // broaden to windows-1252 then
            if (($conv === false || $this->_lastIconvError) && strtolower($charset) == 'windows-1250') {
                $this->_lastIconvError = false;
                $conv = iconv('windows-1252', 'UTF-8', $text);
            }

            // check if the charset is us-ascii and broaden to windows-1252
            // if encoding attempt fails
            if (($conv === false || $this->_lastIconvError) && strtolower($charset) == 'us-ascii') {
                $this->_lastIconvError = false;
                $conv = iconv('windows-1252', 'UTF-8', $text);
            }

            // fallback! if we have mb-extension installed, we'll try to detect the encoding, if
            // first try with iconv didn't work
            if (($conv === false || $this->_lastIconvError) && function_exists('mb_detect_encoding')) {
                $this->_lastIconvError = false;
                $peekEncoding = mb_detect_encoding($text, $this->_getEncodingList(), true);
                $conv = iconv($peekEncoding, 'UTF-8', $text);
            }
            if ($conv === false || $this->_lastIconvError) {
                $this->_lastIconvError = false;
                $conv = iconv($charset, 'UTF-8//TRANSLIT', $text);
            }
            if ($conv === false || $this->_lastIconvError) {
                $this->_lastIconvError = false;
                $conv = iconv($charset, 'UTF-8//IGNORE', $text);
            }
            if ($conv !== false && !$this->_lastIconvError) {
                $text = $conv;
            }
        } else {
            $conv = false;
            if (function_exists('mb_detect_encoding')) {
                $this->_lastIconvError = false;
                $peekEncoding = mb_detect_encoding($text, $this->_getEncodingList(), true);
                $conv = iconv($peekEncoding, 'UTF-8', $text);
            }
            if ($conv === false || $this->_lastIconvError) {
                $this->_lastIconvError = false;
                $conv = iconv('UTF-8', 'UTF-8//IGNORE', $text);
            }
            if ($conv !== false && !$this->_lastIconvError) {
                $text = $conv;
            }
        }
        $this->_restoreErrorHandler();

        return $text;
    }


    /**
     * Returns available encodings
     * @return string
     */
    private function _getEncodingList()
    {
        return 'UCS-4, UCS-4BE, UCS-4LE, UCS-2, UCS-2BE, UCS-2LE, UTF-32, UTF-32BE, UTF-32LE, UTF-16, UTF-16BE, UTF-16LE, UTF-8, UTF-7, UTF7-IMAP,  ASCII, EUC-JP, SJIS, eucJP-win, CP51932, JIS, ISO-2022-JP,  ISO-2022-JP-MS, Windows-1252, ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4,  ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, ISO-8859-13,  ISO-8859-14, ISO-8859-15, ISO-8859-16, EUC-CN, CP936, HZ, EUC-TW, BIG-5, EUC-KR,  UHC, ISO-2022-KR, Windows-1251, CP866, KOI8-R, ArmSCII-8';
    }


    /**
     * Sets handler for iconv errors
     */
    private function _setIconvErrorHandler()
    {
        $this->_lastIconvError = false;
        set_error_handler(array($this, '_iconvErrorHandler'));
    }


    /**
     * Restores default error handler
     */
    private function _restoreErrorHandler()
    {
        $this->_lastIconvError = false;
        restore_error_handler();
    }


    /**
     * Reset last iconv error
     */
    private function _iconvErrorHandler()
    {
        $this->_lastIconvError = true;
    }


    /**
     * Extract encoding information
     * @param PartInterface $message
     * @return array
     */
    public function getEncodingInformation($message)
    {
        $contentTransferEncoding = "";
        $charset = "";
        $contentType = "";
        // also look up name for attachments
        $name = "";

        if ($message->getHeaders() && $message->getHeaders()->has('content-transfer-encoding')) {
            $contentTransferEncoding = $message->contentTransferEncoding;
        }

        try {
            if ($message->getHeaders() && $message->getHeaders()->has('content-type')) {
                $contentTypeValue = $message->contentType;

                if (strpos($contentTypeValue, ';') !== false) {
                    $contentType = strtok($contentTypeValue, ';');

                    while (($value = strtok(';')) !== false) {
                        $value = trim($value);
                        if (strpos($value, 'charset') === 0) {
                            $charset = trim($value);
                        } else if (strpos($value, 'name') === 0) {
                            $name = trim($value);
                        }
                    }

                    if ($charset != "") {
                        // probably no ";" used as separator, but line-feed or space.
                        if (
                            strpos($charset, "\r\n") !== false || strpos($charset, "\n") !== false
                                || strpos($charset, "\r") !== false || strpos($charset, " ") !== false
                        ) {
                            $sep = "__IB_" . time() . "_EOL__";
                            $charset = str_replace(array(" ", "\r\n", "\n", "\r"), $sep, $charset);
                            $charsets = explode($sep, $charset);
                            $charset = $charsets[0];
                        }
                        $charset = str_replace(array('charset=', '"', "'"), '', $charset);
                    }
                    if ($name != "") {
                        $name = str_replace(array('name=', '"', "'"), '', $name);
                    }
                }
            }
        } catch (RuntimeException $e) {
            // ignore
        }

        return array (
            'contentType'             => strtolower($contentType),
            'charset'                 => strtolower($charset),
            'name'                    => $name,
            'contentTransferEncoding' => strtolower($contentTransferEncoding)
        );
    }


    /**
     * Parse multipart mixed message
     *
     * @param PartInterface $message
     * @param array $emailItem
     */
    private function _parseMultipartMixed(&$message, &$emailItem)
    {
        try {
            $len = $message->countParts() + 1;
        } catch (Exception $e) {
            $len = $this->handleMimeEndMissing($message, $emailItem);
        }

        for ($i = 1; $i < $len; $i++) {
            $part = $message->getPart($i);

            $encodingInformation = $this->getEncodingInformation($part);
            $contentType = $encodingInformation['contentType'];

            // skip to attachments if encodingInformation detects "name" value
            if (isset($encodingInformation['name']) && $encodingInformation['name'] != "") {
                $contentType = "___";
            }

            switch ($contentType) {
                case 'text/plain':
                    if (!isset($emailItem['contentTextPlain'])) {
                        $emailItem['contentTextPlain'] = $this->_decode($part->getContent(), $encodingInformation);
                    }
                    break;

                case 'text/html':
                    if (!isset($emailItem['contentTextHtml'])) {
                        $emailItem['contentTextHtml'] = $this->_decode($part->getContent(), $encodingInformation);
                    }
                    break;

                case 'text/calendar':
                    if (!isset($emailItem['contentTextCalendar'])) {
                        $emailItem['contentTextCalendar'] = $this->_decode($part->getContent(), $encodingInformation);
                    }
                    break;

                case 'multipart/related':
                    $this->_parseMultipartRelated($part, $emailItem);
                    break;

                case 'multipart/alternative':
                    $this->_parseMultipartAlternative($part, $emailItem);
                    break;

                case 'multipart/mixed':
                    $this->_parseMultipartMixed($part, $emailItem);
                    break;

                default:
                    $this->_parseAttachments($part);
                    break;
            }
        }
    }


    /**
     * Parse multipart alternative message
     *
     * @param PartInterface $message
     * @param array $emailItem
     * @return void
     */
    private function _parseMultipartAlternative(&$message, &$emailItem)
    {
        try {
            $len = $message->countParts() + 1;
        } catch (Exception $e) {
            $len = $this->handleMimeEndMissing($message, $emailItem);
        }

        for ($i = 1; $i < $len; $i++) {
            $part = $message->getPart($i);

            $encodingInformation = $this->getEncodingInformation($part);
            $contentType = $encodingInformation['contentType'];

            switch ($contentType) {
                case 'text/plain':
                    if (!isset($emailItem['contentTextPlain'])) {
                        $emailItem['contentTextPlain'] = $this->_decode($part->getContent(), $encodingInformation);
                    }
                    break;

                case 'text/html':
                    if (!isset($emailItem['contentTextHtml'])) {
                        $emailItem['contentTextHtml'] = $this->_decode($part->getContent(), $encodingInformation);
                    }
                    break;

                case 'text/calendar':
                    if (!isset($emailItem['contentTextCalendar'])) {
                        $emailItem['contentTextCalendar'] = $this->_decode($part->getContent(), $encodingInformation);
                    }
                    break;

                case 'multipart/related':
                    $this->_parseMultipartRelated($part, $emailItem);
                    break;
            }
        }
    }


    /**
     * Parse multipart signed message
     *
     * @param Message $message
     * @param array $emailItem
     */
    private function _parseMultipartSigned(&$message, &$emailItem)
    {
        try {
            $len = $message->countParts() + 1;
        } catch (Exception $e) {
            $len = $this->handleMimeEndMissing($message, $emailItem);
        }

        for ($i = 1; $i < $len; $i++) {
            $part = $message->getPart($i);

            $encodingInformation = $this->getEncodingInformation($part);
            $contentType = $encodingInformation['contentType'];

            switch ($contentType) {
                case 'text/plain':
                    if (!isset($emailItem['contentTextPlain'])) {
                        $emailItem['contentTextPlain'] = $this->_decode($part->getContent(), $encodingInformation);
                    }
                    break;

                case 'text/html':
                    if (!isset($emailItem['contentTextHtml'])) {
                        $emailItem['contentTextHtml'] = $this->_decode($part->getContent(), $encodingInformation);
                    }
                    break;

                case 'multipart/alternative':
                    $this->_parseMultipartAlternative($part, $emailItem);
                    break;

                case 'multipart/mixed':
                    $this->_parseMultipartMixed($message, $emailItem);
                    break;

                case 'multipart/related':
                    $this->_parseMultipartRelated($part, $emailItem);
                    break;

                default:
                    $this->_parseAttachments($part);
                    break;
            }
        }
    }


    /**
     * Parse multipart report message
     *
     * @param Message $message
     * @param array $emailItem
     */
    private function _parseMultipartReport(&$message, &$emailItem)
    {
        try {
            $len = $message->countParts() + 1;
        } catch (Exception $e) {
            $len = $this->handleMimeEndMissing($message, $emailItem);
        }

        $defCharsetForDeliveryStatus = null;
        for ($i = 1; $i < $len; $i++) {
            $part = $message->getPart($i);

            $encodingInformation = $this->getEncodingInformation($part);
            $contentType = $encodingInformation['contentType'];

            if (!$contentType) {
                $contentType = $this->getPartContentType($part);
            }

            switch ($contentType) {
                case 'text/plain':
                    $report = $this->_decode($part->getContent(), $encodingInformation);

                    if (!isset($emailItem['contentTextPlain'])) {
                        $emailItem['contentTextPlain']  = $report;
                    } else {
                        $emailItem['contentTextPlain'] .= PHP_EOL . $report;
                    }
                    break;

                case 'text/html':
                    if (!isset($emailItem['contentTextHtml'])) {
                        $emailItem['contentTextHtml'] = $this->_decode($part->getContent(), $encodingInformation);
                    }
                    break;

                case 'message/delivery-status':
                    $deliveryStatus = $this->_decode($part->getContent(), $encodingInformation);

                    if (!isset($emailItem['contentTextPlain'])) {
                        $emailItem['contentTextPlain']  = $deliveryStatus;
                    } else {
                        $emailItem['contentTextPlain'] .= PHP_EOL . $deliveryStatus;
                    }
                    break;

                default:
                    $this->_parseAttachments($part);
                    break;
            }
        }
    }


    /**
     * Parse multipart related message
     *
     * @param PartInterface $message
     * @param array $emailItem
     */
    private function _parseMultipartRelated(&$message, &$emailItem)
    {
        try {
            $len = $message->countParts() + 1;
        } catch (Exception $e) {
            $len = $this->handleMimeEndMissing($message, $emailItem);
        }

        for ($i = 1; $i < $len; $i++) {
            $part = $message->getPart($i);

            $encodingInformation = $this->getEncodingInformation($part);
            $contentType = $encodingInformation['contentType'];

            switch ($contentType) {
                case 'text/plain':
                    if (!isset($emailItem['contentTextPlain'])) {
                        $emailItem['contentTextPlain'] = $this->_decode($part->getContent(), $encodingInformation);
                    }
                    break;

                case 'text/html':
                    if (!isset($emailItem['contentTextHtml'])) {
                        $emailItem['contentTextHtml'] = $this->_decode($part->getContent(), $encodingInformation);
                    }
                    break;

                case 'multipart/alternative':
                    $this->_parseMultipartAlternative($part, $emailItem);
                    break;

                default:
                    $this->_parseAttachments($part);
                    break;
            }
        }
    }


    /**
     * Parse attachments and look for attached emails
     * @param PartInterface $part
     */
    private function _parseAttachments(PartInterface $part)
    {
        $encodingInformation = $this->getEncodingInformation($part);

        $fileName                = $encodingInformation['name'];
        $contentType             = $encodingInformation['contentType'];

        if (!$contentType) {
            $contentType = $this->getPartContentType($part);
        }

        if ($contentType == 'message/rfc822' || $contentType == 'rfc822') {
            try {
                try {
                    $nm = \Zend\Mail\Message::fromString($part->getContent());
                    if (!$fileName) {
                        $fileName = mb_decode_mimeheader($nm->getSubject()) . '.eml';
                    }
                } catch (InvalidHeaderException $e) {
                    $fileName = Lang::get('EMAIL_ATTACHMENT') . '.eml';
                }

                $this->_attachedEmails[] = [
                    'type' => $contentType,
                    'content' => $part->getContent(),
                    'filename' => $fileName,
                ];
            } catch (RuntimeException $e) {
                // ignore
            } catch (\Zend\Mail\Exception\InvalidArgumentException $e) {
                // ignore
            }
        }
    }


    /**
     * Handle invalid mime message structure & fix missing ending boundary
     *
     * @param PartInterface $message
     * @param array $emailItem
     *
     * @return int
     *
     * @throws \Zend\Mime\Exception\RuntimeException
     */
    public function handleMimeEndMissing(&$message, &$emailItem = null)
    {
        try {
            // this is a fallback for "end is missing", if a mime message does not contain
            // the closing boundary
            $ct = $message->getContent();

            if (!$message->getHeaders()->has('content-type')) {
                throw new \Zend\Mime\Exception\RuntimeException('No content type header/no boundary');
            }
            $boundary = $message->getHeader('content-type')->getParameter('boundary');

            if ($boundary) {
                if (preg_match_all('/--' . $boundary . '$/', trim($ct)) && !strpos($ct, '--' . $boundary . '--')) {
                    $ct = preg_replace(
                        '/--' . $boundary . '$/',
                        '',
                        trim($ct)
                    );

                    $ct = trim($ct) . "\r\n" . '--' . $boundary . '--';
                } else {
                    $p = strpos($ct, '--' . $boundary . '--');

                    if ($p === false) {
                        $ct .= "\r\n" . '--' . $boundary . '--';
                    }
                }

                $message = new Message([
                    'headers'    => $message->getHeaders(),
                    'noTopLines' => true,
                    'content'    => $ct,
                ]);

                $len = 2;
                return $len;
            } else {
                throw new \Zend\Mime\Exception\RuntimeException('No boundary');
            }
        } catch (\Zend\Mime\Exception\RuntimeException $e) {
            $encodingInformation = $this->getEncodingInformation($message);
            $contentType = $encodingInformation['contentType'];

            if ($contentType == 'text/plain') {
                $emailItem['contentTextPlain'] = $this->_decode($ct, $encodingInformation);
            } else if ($contentType == 'text/html') {
                $emailItem['contentTextHtml'] = $this->_decode($ct, $encodingInformation);
            }
            return 0;
        }
    }


    /**
     * Retrieves content type from part headers
     *
     * @param PartInterface $part
     * @return string
     */
    public function getPartContentType($part)
    {
        try {
            $headers = $part->getHeaders();
            if ($headers instanceof Headers && $headers->has('content-type')) {
                return $headers->get('content-type')->getFieldValue();
            }
        } catch (Exception $ex) {
            // do nothing
        }
    }


    /**
     * Retrieves decoded subject from headers
     *
     * @param string $headers
     * @return string
     */
    public function getSubjectFromHeaders($headers)
    {
        $subject       = '';
        $isSubjectLine = false;

        foreach (explode("\r\n", $headers) as $line) {
            // check if a header name is present
            if (preg_match('/^(?P<name>[\x21-\x39\x3B-\x7E]+):.*$/', $line, $matches)) {
                list ($headerName, $headerValue) = explode(':', $line, 2);

                if ($isSubjectLine = strtolower($headerName) == 'subject') {
                    if (preg_match("/=\?/", $headerValue)) {
                        $subject = trim(mb_decode_mimeheader($headerValue));
                    } else {
                        $subject = trim($headerValue);
                    }
                }
            } elseif (preg_match('/^\s+.*$/', $line, $matches)) {
                // continuation: append to current line

                if ($isSubjectLine) {
                    if (preg_match("/=\?/", $line)) {
                        $subject .= trim(mb_decode_mimeheader($line));
                    } else {
                        $subject .= trim($line);
                    }
                }
            } elseif (preg_match('/^\s*$/', $line)) {
                // empty line indicates end of headers
                break;
            }
        }

        if (!mb_check_encoding($subject)) {
            $subject = mb_convert_encoding($subject, 'utf-8', array('iso-8859-2'));
        }

        return str_replace(["\r\n", "\r", "\n"], ' ', $subject);
    }


    /**
     * Returns message id
     *
     * @param PartInterface $message message source
     * @param bool          $generateOnEmpty generates random ID when no header found
     * @param array $generateFrom
     *
     * @return string
     */
    public function getMessageId(PartInterface $message, $generateOnEmpty = false, array $generateFrom = null)
    {
        $headers = $message->getHeaders();
        $messageIdValue = null;

        // check if message-id header exists in message source
        if ($headers->has('message-id')) {
            $messageId = $headers->get('message-id');

            // multiple message-id header occurred in this message
            if ($messageId instanceof ArrayIterator) {
                foreach ($messageId as $singleHeader) {
                    if ($singleHeader instanceof HeaderInterface) {
                        if ($singleHeader->getFieldValue()) {
                            $messageIdValue = $singleHeader->getFieldValue();
                            break;
                        }
                    }
                }
            } elseif ($messageId instanceof HeaderInterface) {
                $messageIdValue = $messageId->getFieldValue();
            }
        }

        // generate random ID when $generateOnEmpty
        if (empty($messageIdValue) && $generateOnEmpty) {
            if ($generateFrom) {
                $messageIdValue = md5('singu-inbox-msg-id-' . implode('|', $generateFrom));
            } else {
                $messageIdValue = md5('singu-inbox-msg-id-' . microtime(true));
            }
        }
        return $messageIdValue;
    }


    /**
     * Retrieves recipients from header (to, cc, bcc)
     *
     * @param PartInterface $message
     * @param string        $headerField
     *
     * @return string
     */
    public function getRecipients(PartInterface $message, $headerField)
    {
        $headers = $message->getHeaders();
        $recipients = '';

        if ($headers->has($headerField)) {
            $headerValue = $headers->get($headerField);
            if ($headerValue instanceof ArrayIterator) {
                $valuesArray = array();
                foreach ($headerValue as $headerElement) {
                    $valuesArray[] = $headerElement->getFieldValue();
                }
                $recipients = implode(',', $valuesArray);
            } else {
                $recipients = $headerValue->getFieldValue();
            }
        }

        return $recipients;
    }


    /**
     * Retrieves sender info from message
     *
     * @param PartInterface $message
     * @param string        $defaultOnEmpty
     *
     * @return string
     */
    public function getSender(PartInterface $message, $defaultOnEmpty = 'unknown')
    {
        $headers = $message->getHeaders();
        $sender  = null;

        if ($headers->has('reply-to')) {
            $sender = $message->replyTo;
        } elseif ($headers->has('from')) {
            $sender = $message->from;
        } elseif ($headers->has('sender')) {
            $sender = $message->sender;
        } elseif ($headers->has('resent-from')) {
            $sender = $message->resentForm;
        } elseif ($headers->has('resent-sender')) {
            $sender = $message->resentSender;
        } else {
            $sender = $defaultOnEmpty;
        }

        return $sender;
    }


    /**
     * Removes meta tags from html body contents
     *
     * @param string $html
     * @return string
     */
    public function stripMetaTags($html)
    {
        return preg_replace('/<meta[^>]+\>/i', "", quoted_printable_decode($html));
    }
}
