koenpunt
7/28/2012 - 5:00 PM

PHP port of gfm at https://gist.github.com/118964

PHP port of gfm at https://gist.github.com/118964

<?php

require 'gfm.php';

class GFMTest extends PHPUnit_Framework_TestCase{
	
	public function testShouldNotTouchSingleUnderscoresInsideWords(){
		$this->assertEquals("foo_bar", gfm("foo_bar"));
	}

	public function testShouldNotTouchUnderscoresInCodeBlocks(){
		$this->assertEquals("    foo_bar_baz", gfm("    foo_bar_baz"));
	}

	public function testShouldNotTouchUnderscoresInPreBlocks(){
		$this->assertEquals("\n\n<pre>\nfoo_bar_baz\n</pre>", gfm("<pre>\nfoo_bar_baz\n</pre>"));
	}

	public function testShouldNotTreatPreBlocksWithPreTextDifferently(){
		$a = "\n\n<pre>\nthis is `a\\_test` and this\\_too\n</pre>";
		$b = "hmm<pre>\nthis is `a\\_test` and this\\_too\n</pre>";
		$this->assertEquals( substr(gfm($a), 2), substr(gfm($b), 3) );
	}

	public function testShouldEscapeTwoOrMoreUnderscoresInsideWords(){
		$this->assertEquals( "foo\\_bar\\_baz", gfm("foo_bar_baz") );
	}

	public function testShouldTurnNewlinesIntoBrTagsInSimpleCases(){
		$this->assertEquals( "foo  \nbar", gfm("foo\nbar") );
	}

	public function testShouldConvertNewlinesInAllGroups(){
		$this->assertEquals( "apple  \npear  \norange\n\nruby  \npython  \nerlang",
							 gfm("apple\npear\norange\n\nruby\npython\nerlang") );
	}

	public function testShouldConvertNewlinesInEvenLongGroups(){
		$this->assertEquals( "apple  \npear  \norange  \nbanana\n\nruby  \npython  \nerlang",
							 gfm("apple\npear\norange\nbanana\n\nruby\npython\nerlang") );
	}

	public function testShouldNotConvertNewlinesInLists(){
		$this->assertEquals( "# foo\n# bar", gfm("# foo\n# bar") );
		$this->assertEquals( "* foo\n* bar", gfm("* foo\n* bar") );
	}
}
<?php

function gfm($text){
	# Extract pre blocks
	$extractions = array();
	
	$text = preg_replace_callback('/<pre>.*?<\/pre>/s', function($matches) use (&$extractions){
		$match = $matches[0];
		$md5 = md5($match);
		$extractions[$md5] = $match;
		return "{gfm-extraction-${md5}}";
	}, $text);

	# prevent foo_bar_baz from ending up with an italic word in the middle
	$text = preg_replace_callback('/(^(?! {4}|\t)\w+_\w+_\w[\w_]*)/s', function($matches){
		$x = $matches[0];
		$x_parts = str_split($x);
		sort($x_parts);
		if( substr(implode('', $x_parts), 0, 2) == '__' ){
			return str_replace('_', '\_', $x);
		}
	}, $text);

	# in very clear cases, let newlines become <br /> tags
	$text = preg_replace_callback('/^[\w\<][^\n]*\n+/m', function($matches){
		$x = $matches[0];
		if( !preg_match('/\n{2}/', $x) ){
			$x = trim($x);
			$x .= "  \n";
		}
		return $x;
	}, $text);

	# Insert pre block extractions
	$text = preg_replace_callback('/\{gfm-extraction-([0-9a-f]{32})\}/', function($matches) use (&$extractions){
		$match = $matches[1];
		return "\n\n" . $extractions[$match];
	}, $text);

	return $text;
}