Skip to content

Commit

Permalink
Minor tweaks and fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ilijastuden committed Apr 22, 2015
1 parent e680cdb commit 5935bb9
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 52 deletions.
23 changes: 9 additions & 14 deletions src/ActiveCollab/EmailReplyExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,20 @@ final class EmailReplyExtractor
const OUTLOOK = 'Outlook';
const YAHOO = 'Yahoo';

public static function extract($path)
/**
* Parse input file and return reply
*
* @param string $path
* @return string
*/
public static function extractReply($path)
{
$parser = new Parser();
$parser->setPath($path);

$extractor = self::getExtractor(self::detectMailer(self::getHeadersRelevantForMailerDetection($parser)), $parser);

print get_class($extractor) . " (" . basename($path) . "):\n";
print (string) $extractor . "\n-------------------------\n";
}

public static function extractReply($path)
{

}

public static function extractForward($path)
{

return (string) $extractor;
}

/**
Expand Down Expand Up @@ -116,7 +111,7 @@ private static function getHeadersRelevantForMailerDetection(Parser &$parser)
* @param string $niddle
* @return boolean
*/
public static function str_starts_with($string, $niddle) {
public static function strStartsWith($string, $niddle) {
return substr($string, 0, strlen($niddle)) == $niddle;
}
}
73 changes: 36 additions & 37 deletions src/ActiveCollab/EmailReplyExtractor/Extractor/Extractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,21 @@ public function joinLines()
}

/**
* Return splitters
* Return original message splitters
*
* @todo
* @return array
*/
protected function getOriginalMessageSplitters()
{
return [
'-- REPLY ABOVE THIS LINE --',
'-- REPLY ABOVE THIS LINE',
'REPLY ABOVE THIS LINE --',
'-- Reply above this line --',
'-----Original Message-----',
'----- Original Message -----',
'-- ODGOVORI ODJE --',
'-------- Original message --------'
];
}
Expand Down Expand Up @@ -285,53 +289,48 @@ protected function &getParser()
}

/**
* Convert HTML to plain text (email style)
*
* @param string $html
* @param boolean $clean
* @param string $html
* @return string
*/
static function toPlainText($html, $clean = false) {
static function toPlainText($html) {
$plain = (string) $html;

// strip slashes
$plain = (string) trim(stripslashes($plain));

// strip unnecessary characters
$plain = (string) preg_replace(array(
"/\r/", // strip carriage returns
"/<script[^>]*>.*?<\/script>/si", // strip immediately, because we don't need any data from it
"/<style[^>]*>.*?<\/style>/is", // strip immediately, because we don't need any data from it
"/style=\".*?\"/" //was: '/style=\"[^\"]*/'
), "", $plain);
$plain = (string) preg_replace([
"/\r/", // strip carriage returns
"/<script[^>]*>.*?<\/script>/si", // strip immediately, because we don't need any data from it
"/<style[^>]*>.*?<\/style>/is", // strip immediately, because we don't need any data from it
"/style=\".*?\"/" //was: '/style=\"[^\"]*/'
], "", $plain);

// entities to convert (this is not a definite list)
$entities = array(
' ' => array('&nbsp;', '&#160;'),
'"' => array('&quot;', '&rdquo;', '&ldquo;', '&#8220;', '&#8221;', '&#147;', '&#148;'),
'\'' => array('&apos;', '&rsquo;', '&lsquo;', '&#8216;', '&#8217;'),
'>' => array('&gt;'),
'<' => array('&lt;'),
'&' => array('&amp;', '&#38;'),
'(c)' => array('&copy;', '&#169;'),
'(R)' => array('&reg;', '&#174;'),
'(tm)' => array('&trade;', '&#8482;', '&#153;'),
'--' => array('&mdash;', '&#151;', '&#8212;'),
'-' => array('&ndash;', '&minus;', '&#8211;', '&#8722;'),
'*' => array('&bull;', '&#149;', '&#8226;'),
'' => array('&pound;', '&#163;'),
'EUR' => array('&euro;', '&#8364;')
);
$entities = [
' ' => [ '&nbsp;', '&#160;' ],
'"' => [ '&quot;', '&rdquo;', '&ldquo;', '&#8220;', '&#8221;', '&#147;', '&#148;' ],
'\'' => [ '&apos;', '&rsquo;', '&lsquo;', '&#8216;', '&#8217;' ],
'>' => [ '&gt;' ],
'<' => [ '&lt;' ],
'&' => [ '&amp;', '&#38;' ],
'(c)' => [ '&copy;', '&#169;' ],
'(R)' => [ '&reg;', '&#174;' ],
'(tm)' => [ '&trade;', '&#8482;', '&#153;' ],
'--' => [ '&mdash;', '&#151;', '&#8212;' ],
'-' => [ '&ndash;', '&minus;', '&#8211;', '&#8722;' ],
'*' => [ '&bull;', '&#149;', '&#8226;' ],
'' => [ '&pound;', '&#163;' ],
'EUR' => [ '&euro;', '&#8364;' ]
];

// convert specified entities
foreach ($entities as $character => $entity) {
$plain = (string) str_replace($entity, $character, $plain);
}

// strip other not previously converted entities
$plain = (string) preg_replace(array(
'/&[^&;]+;/si',
), "", $plain);
$plain = (string) preg_replace([ '/&[^&;]+;/si' ], "", $plain);

// <p> converts to 2 newlines
$plain = (string) preg_replace('/<p[^>]*>/i', "\n\n", $plain); // <p>
Expand All @@ -341,8 +340,8 @@ static function toPlainText($html, $clean = false) {
return "\n\n" . mb_strtoupper($matches[1]) . "\n\n";
}, $plain); // <h1-h6>

$plain = (string) preg_replace_callback(array('/<b[^>]*>(.*?)<\/b>/i', '/<strong[^>]*>(.*?)<\/strong>/i'), function($matches) {
return mb_strtoupper($matches[1]);
$plain = (string) preg_replace_callback([ '/<b[^>]*>(.*?)<\/b>/i', '/<strong[^>]*>(.*?)<\/strong>/i' ], function($matches) {
return $matches[1];
}, $plain); // <b> <strong>

// deal with italic elements
Expand Down Expand Up @@ -372,9 +371,9 @@ static function toPlainText($html, $clean = false) {
$url = $matches[1];
$text = $matches[2];

if (EmailReplyExtractor::str_starts_with($url, 'http://') || EmailReplyExtractor::str_starts_with($url, 'https://')) {
if (EmailReplyExtractor::strStartsWith($url, 'http://') || EmailReplyExtractor::strStartsWith($url, 'https://')) {
return "$text [$url]";
} else if (EmailReplyExtractor::str_starts_with($url, 'mailto:')) {
} else if (EmailReplyExtractor::strStartsWith($url, 'mailto:')) {
return $text . ' [' . substr($url, 7) . ']';
} else {
return $text;
Expand All @@ -390,8 +389,8 @@ static function toPlainText($html, $clean = false) {
if (!empty($lines)) {
foreach ($lines as $line) {
$return[] = '> ' . $line;
} // if
} // if
}
}
return "\n\n" . implode("\n", $return) . "\n\n";
}, $plain);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public function testExampleMessages()
}

if ($file->isFile() && $file->getExtension() == 'eml') {
EmailReplyExtractor::extract($file->getPathname());
$this->assertEquals('Email Reply', EmailReplyExtractor::extractReply($file->getPathname()));
}
}
}
Expand Down

0 comments on commit 5935bb9

Please sign in to comment.