* @license GNU General Public License 2.0 */ class AutoP { // 作为段落的标签 private const BLOCK = 'p|pre|div|blockquote|form|ul|ol|dd|table|ins|h1|h2|h3|h4|h5|h6'; /** * 唯一id * * @access private * @var integer */ private $uniqueId = 0; /** * 存储的段落 * * @access private * @var array */ private $blocks = []; /** * 替换段落的回调函数 * * @param array $matches 匹配值 * @return string */ public function replaceBlockCallback(array $matches): string { $tagMatch = '|' . $matches[1] . '|'; $text = $matches[4]; switch (true) { /** 用br处理换行 */ case false !== strpos( '|li|dd|dt|td|p|a|span|cite|strong|sup|sub|small|del|u|i|b|ins|h1|h2|h3|h4|h5|h6|', $tagMatch ): $text = nl2br(trim($text)); break; /** 用段落处理换行 */ case false !== strpos('|div|blockquote|form|', $tagMatch): $text = $this->cutByBlock($text); if (false !== strpos($text, '

')) { $text = $this->fixParagraph($text); } break; default: break; } /** 没有段落能力的标签 */ if (false !== strpos('|a|span|font|code|cite|strong|sup|sub|small|del|u|i|b|', $tagMatch)) { $key = ''; } else { $key = ''; } $this->blocks[$key] = "<{$matches[1]}{$matches[3]}>{$text}"; return $key; } /** * 用段落方法处理换行 * * @param string $text * @return string */ private function cutByBlock(string $text): string { $space = "( |　)"; $text = str_replace("\r\n", "\n", trim($text)); $text = preg_replace("/{$space}*\n{$space}*/is", "\n", $text); $text = preg_replace("/\s*\s*/is", "

", $text); $text = preg_replace("/\n{2,}/", "

", $text); $text = nl2br($text); $text = preg_replace("/(

)?\s*\s*(<\/p>)?/is", "", $text); $text = preg_replace("/

{$space}*<\/p>/is", '', $text); $text = preg_replace("/\s*

\s*$/is", '', $text); $text = preg_replace("/^\s*<\/p>\s*/is", '', $text); return $text; } /** * 修复段落开头和结尾 * * @param string $text * @return string */ private function fixParagraph(string $text): string { $text = trim($text); if (!preg_match("/^<(" . self::BLOCK . ")(\s|>)/i", $text)) { $text = '

' . $text; } if (!preg_match("/<\/(" . self::BLOCK . ")>$/i", $text)) { $text = $text . '

'; } return $text; } /** * 自动分段 * * @param string $text * @return string */ public function parse(string $text): string { /** 重置计数器 */ $this->uniqueId = 0; $this->blocks = []; /** 将已有的段落后面的换行处理掉 */ $text = preg_replace(["/<\/p>\s+\s*/is"], ["

"], trim($text)); /** 将所有非自闭合标签解析为唯一的字符串 */ $foundTagCount = 0; $textLength = strlen($text); $uniqueIdList = []; if (preg_match_all("/<\/\s*([a-z0-9]+)>/is", $text, $matches, PREG_OFFSET_CAPTURE)) { foreach ($matches[0] as $key => $match) { $tag = $matches[1][$key][0]; $leftOffset = $match[1] - $textLength; $posSingle = strrpos($text, '<' . $tag . '>', $leftOffset); $posFix = strrpos($text, '<' . $tag . ' ', $leftOffset); $pos = false; switch (true) { case (false !== $posSingle && false !== $posFix): $pos = max($posSingle, $posFix); break; case false === $posSingle && false !== $posFix: $pos = $posFix; break; case false !== $posSingle && false === $posFix: $pos = $posSingle; break; default: break; } if (false !== $pos) { $uniqueId = $this->makeUniqueId(); $uniqueIdList[$uniqueId] = $tag; $tagLength = strlen($tag); $text = substr_replace($text, $uniqueId, $pos + 1 + $tagLength, 0); $text = substr_replace( $text, $uniqueId, $match[1] + 7 + $foundTagCount * 10 + $tagLength, 0 ); // 7 = 5 + 2 $foundTagCount++; } } } foreach ($uniqueIdList as $uniqueId => $tag) { $text = preg_replace_callback( "/<({$tag})({$uniqueId})([^>]*)>(.*)<\/\\1\\2>/is", [$this, 'replaceBlockCallback'], $text, 1 ); } $text = $this->cutByBlock($text); $blocks = array_reverse($this->blocks); foreach ($blocks as $blockKey => $blockValue) { $text = str_replace($blockKey, $blockValue, $text); } return $this->fixParagraph($text); } /** * 生成唯一的id, 为了速度考虑最多支持1万个tag的处理 * * @return string */ private function makeUniqueId(): string { return ':' . str_pad($this->uniqueId ++, 4, '0', STR_PAD_LEFT); } }