$xml = simplexml_load_file( 'sample.xml' ); $result = $xml->xpath( '/foo/bar' );
public array SimpleXMLElement::xpath ( string $path )PHP: SimpleXMLElement::xpath - Manual
SimpleXMLの詳細については、SimpleXMLの使用方法で解説しています。
「PHP で XPath を使用する」の例を参考に解説します。
対象とするのは、次のXML文書です。
<?xml version="1.0" encoding="utf-8"?> <PRODUCTS> <PRODUCT category="software"> <SKU>soft5678</SKU> <SUB_CATEGORY>Business Analysis</SUB_CATEGORY> <NAME>Widget Reporting</NAME> <PRICE>4500</PRICE> </PRODUCT> <PRODUCT category="software"> <SKU>soft32323</SKU> <SUB_CATEGORY>Business Analysis</SUB_CATEGORY> <NAME>Pro Reporting</NAME> <PRICE>2300</PRICE> </PRODUCT> <PRODUCT category="storage"> <SKU>soft32323</SKU> <SUB_CATEGORY>Tape Systems</SUB_CATEGORY> <NAME>Tapes Abound</NAME> <PRICE>2300</PRICE> </PRODUCT> <PRODUCT category="storage"> <SKU>soft32323</SKU> <SUB_CATEGORY>Disk Systems</SUB_CATEGORY> <NAME>Widget100 Series</NAME> <PRICE>6500</PRICE> </PRODUCT> </PRODUCTS>
この文書を、
$xml = new SimpleXMLElement( $doc );
で、$xmlにSimpleXMLElementオブジェクトとして格納してあるものとします。
$xml->xpath( '/PRODUCTS/PRODUCT/NAME' );
Array ( [0] => SimpleXMLElement Object ( [0] => Widget Reporting ) [1] => SimpleXMLElement Object ( [0] => Pro Reporting ) [2] => SimpleXMLElement Object ( [0] => Tapes Abound ) [3] => SimpleXMLElement Object ( [0] => Widget100 Series ) )
$xml->xpath( '/PRODUCTS/PRODUCT[SKU="soft5678"]/NAME' );
Array ( [0] => SimpleXMLElement Object ( [0] => Widget Reporting ) )
$xml->xpath( '/PRODUCTS/PRODUCT[@category="software" and PRICE > 2500]' );
Array ( [0] => SimpleXMLElement Object ( [@attributes] => Array ( [category] => software ) [SKU] => soft5678 [SUB_CATEGORY] => Business Analysis [NAME] => Widget Reporting [PRICE] => 4500 ) )
結果オブジェクトの要素を参照する方法は、要素や属性の参照で解説しています。
$str = <<<EOM <a> <b><c>10</c></b> <b><c>20</c></b> </a> EOM; $xml = simplexml_load_string( $str ); foreach( $xml->xpath( '//b' ) as $item ) { print_r( $item->xpath( '//c' ) ); } // Array // ( // [0] => SimpleXMLElement Object ( [0] => 10 ) // [1] => SimpleXMLElement Object ( [0] => 20 ) // // ) // Array // ( // [0] => SimpleXMLElement Object ( [0] => 10 ) // [1] => SimpleXMLElement Object ( [0] => 20 ) // )
これは$xml->xpath( '//c' )
とするのと同様の結果であり、期待したものではありません。この場合は次のように修正します。
$xml = simplexml_load_string( $str ); foreach( $xml->xpath( '//b' ) as $item ) { // XML文字列に変換し、それをパースして再びオブジェクトに変換する $item = simplexml_load_string( $item->asXML() ); print_r( $item->xpath( '//c' ) ); } // Array // ( // [0] => SimpleXMLElement Object ( [0] => 10 ) // ) // Array // ( // [0] => SimpleXMLElement Object ( [0] => 20 ) // )
DOMDocumentのloadHTMLFile()でHTMLからDOMとして読み込み、それをsimplexml_import_dom()へ渡すことで、HTMLもXMLと同様に処理できるようになります。
$dom = new DOMDocument(); @$dom->loadHTMLFile( 'sample.html' ); $xml = simplexml_import_dom( $dom ); $result = $xml->xpath( '/foo/bar' );
public DOMXPath::__construct ( DOMDocument $doc )PHP: DOMXPath::__construct - Manual
public DOMNodeList DOMXPath::query ( string $expression // XPath式 [, DOMNode $contextnode // 相対XPathでの基準ノード [, bool $registerNodeNS = TRUE // FALSEのとき、コンテキストノードの自動登録を無効にする ]] )PHP: DOMXPath::query - Manual
SimpleXMLの例と同一のXML文書を対象とするものとします。
$dom = new DOMDocument(); $dom->load( 'sample.xml' ); $xpath = new DOMXpath( $dom ); $result = $xpath->query( '/PRODUCTS/PRODUCT' );
DOMDocument::load()で、XML文書をファイルから読み込みます。そしてそのDOMDocumentオブジェクトから、DOMXPathオブジェクトを作成します。そしてquery()でXPath式を実行し、結果をDOMNodeListオブジェクトで取得します。
print_r( $result );
DOMNodeList Object ( [length] => 4 )
print_r( $result->item( 0 ) );
DOMElement Object ( [tagName] => PRODUCT [schemaTypeInfo] => [nodeName] => PRODUCT [nodeValue] => soft5678 Business Analysis Widget Reporting 4500 [nodeType] => 1 [parentNode] => (object value omitted) [childNodes] => (object value omitted) [firstChild] => (object value omitted) [lastChild] => (object value omitted) [previousSibling] => (object value omitted) [nextSibling] => (object value omitted) [attributes] => (object value omitted) [ownerDocument] => (object value omitted) [namespaceURI] => [prefix] => [localName] => PRODUCT [baseURI] => file:///C:/localhost/sample.xml [textContent] => soft5678 Business Analysis Widget Reporting 4500 )
print_r( $result->item( 0 )->childNodes );
DOMNodeList Object ( [length] => 9 )
print_r( $result->item( 0 )->childNodes->item( 1 ) );
DOMElement Object ( [tagName] => SKU [schemaTypeInfo] => [nodeName] => SKU [nodeValue] => soft5678 [nodeType] => 1 [parentNode] => (object value omitted) [childNodes] => (object value omitted) [firstChild] => (object value omitted) [lastChild] => (object value omitted) [previousSibling] => (object value omitted) [nextSibling] => (object value omitted) [attributes] => (object value omitted) [ownerDocument] => (object value omitted) [namespaceURI] => [prefix] => [localName] => SKU [baseURI] => file:///C:/localhost/sample.xml [textContent] => soft5678 )