You should not use regular expressions to process HTML. Just a small change in the HTML would make your regex fail. A space of more, a change in the attributes of the tag, a comment, or more complex structures, would make even a gigantic regex not follow the rules.
It's very easy to process HTML with DOM , they are the tools that They are designed for that.
The DOM is simply generated as follows:
//Ingreso
$html = '
<div class="try">foo</div>
<div>foo</div>
foo
<span>foo</span>'
//Generar el DOM
$doc = new DOMDocument;
$doc->loadHTML($html, LIBXML_COMPACT | LIBXML_NONET);
Taking the body of the generated document:
$body = $doc->getElementsByTagName('body')->item(0);
And we can go through all the child nodes:
foreach ($nodoDOM->childNodes as $nodo) {
Replacing text nodes:
if ($nodo->nodeType == XML_TEXT_NODE) {
$nodo->nodeValue = str_ireplace( $buscar, $reemplazo, $nodo->nodeValue);
}
Or recursively looking at nodes that do not have the class "try"
:
if( !$nodo->hasAttributes()
|| !in_array("try", explode(" ", $nodo->getAttribute("class"))) ) {
Code
<?php
//Ingreso
$html = '
<div class="try">foo</div>
<div>foo</div>
foo
<span>foo</span>';
echo "TEXTO ORIGINAL:" . $html;
//Generar el DOM
$doc = new DOMDocument;
$doc->loadHTML($html, LIBXML_COMPACT | LIBXML_NONET);
$body = $doc->getElementsByTagName('body')->item(0);
//Reemplazar recursivamente
$html = buscarNodoDOM($body, "foo", "bar", "try");
function buscarNodoDOM(DOMNode $nodoDOM, $buscar, $reemplazo, $claseProhibida, $recursivo = false) {
$resultado = "";
//recorrer todos los nodos
foreach ($nodoDOM->childNodes as $nodo) {
if ($nodo->nodeType == XML_TEXT_NODE) {
//Si es un nodo de texto, reemplazar
$nodo->nodeValue = str_ireplace( $buscar, $reemplazo, $nodo->nodeValue);
} elseif(( !$nodo->hasAttributes() || !in_array($claseProhibida, explode(" ", $nodo->getAttribute("class"))) )
&& $nodo->hasChildNodes()) {
//si no es de la clase prohibida, y tiene hijos, recorrer todos los hijos
buscarNodoDOM($nodo, $buscar, $reemplazo, $claseProhibida, true);
}
if (!$recursivo) {
//si estamos en el body, agregar todo el HTML del nodo al resultado
$resultado .= $nodoDOM->ownerDocument->saveHTML($nodo);
}
}
//devolver el HTML como texto
return $resultado;
}
//Mostrar resultado
echo "\n\nTEXTO MODIFICADO:\n" . $html;
Result:
TEXTO ORIGINAL:
<div class="try">foo</div>
<div>foo</div>
foo
<span>foo</span>
TEXTO MODIFICADO:
<div class="try">foo</div>
<div>bar</div>
bar
<span>bar</span>
Demo:
Watch the demo at 3v4l.org