@@ -1035,8 +1035,7 @@ protected function grabArticle(?JSLikeHTMLElement $page = null): ?JSLikeHTMLElem
1035
1035
}
1036
1036
}
1037
1037
1038
- if ($ this ->hasSingleTagInsideElement ($ node , 'p ' ) && $ this ->getLinkDensity ($ node ) < 0.25 ) {
1039
- $ newNode = $ node ->childNodes ->item (0 );
1038
+ if (($ newNode = $ this ->getSingleTagInsideElement ($ node , 'p ' )) !== null && $ this ->getLinkDensity ($ node ) < 0.25 ) {
1040
1039
$ node ->parentNode ->replaceChild ($ newNode , $ node );
1041
1040
$ nodesToScore [] = $ newNode ;
1042
1041
}
@@ -1538,10 +1537,10 @@ private function isPhrasingContent($node): bool
1538
1537
1539
1538
/**
1540
1539
* Checks if `$node` has only whitespace and a single element with `$tag` for the tag name.
1541
- * Returns false if `$node` contains non-empty text nodes
1540
+ * Returns the matched element, or `null` if `$node` contains non-empty text nodes
1542
1541
* or if it contains no element with given tag or more than 1 element.
1543
1542
*/
1544
- private function hasSingleTagInsideElement (JSLikeHTMLElement $ node , string $ tag ): bool
1543
+ private function getSingleTagInsideElement (JSLikeHTMLElement $ node , string $ tag ): ? JSLikeHTMLElement
1545
1544
{
1546
1545
$ childNodes = iterator_to_array ($ node ->childNodes );
1547
1546
$ children = array_filter ($ childNodes , fn ($ childNode ) => $ childNode instanceof \DOMElement);
@@ -1554,7 +1553,7 @@ private function hasSingleTagInsideElement(JSLikeHTMLElement $node, string $tag)
1554
1553
// And there should be no text nodes with real content
1555
1554
$ a = array_filter ($ childNodes , fn ($ childNode ) => $ childNode instanceof \DOMText && preg_match ($ this ->regexps ['hasContent ' ], $ this ->getInnerText ($ childNode )));
1556
1555
1557
- return 0 === \count ($ a );
1556
+ return 0 === \count ($ a ) ? $ children [ 0 ] : null ;
1558
1557
}
1559
1558
1560
1559
/**
0 commit comments