■HTML::Selector::XPath を nth-child に対応させるパッチ
coderepos にコミット済。そのうちリリースされる予定。
process 'tr.r, tr.s', 'modules[]', scraper { process 'td:nth-child(1)', 'module', 'TEXT' };
とかできるようになって便利。
=== lib/HTML/Selector/XPath.pm ================================================================== --- lib/HTML/Selector/XPath.pm (revision 1289) +++ lib/HTML/Selector/XPath.pm (local) @@ -21,7 +21,7 @@ # attribute value match attr2 => qr/^\[\s*([^~\|=\s]+)\s*([~\|]?=)\s*"([^"]+)"\s*\]/i, attrN => qr/^:not\((.*?)\)/i, - pseudo => qr/^:([()a-z_-]+)/i, + pseudo => qr/^:([()a-z0-9_-]+)/i, # adjacency/direct descendance combinator => qr/^(\s*[>+\s])/i, # rule separator @@ -115,6 +115,8 @@ $parts[$#parts] = '*[1]/self::' . $parts[$#parts]; } elsif ($1 =~ /^lang\(([\w\-]+)\)$/) { push @parts, "[\@xml:lang='$1' or starts-with(\@xml:lang, '$1-')]"; + } elsif ($1 =~ /^nth-child\((\d+)\)$/) { + push @parts, "[count(preceding-sibling::*) = @{[ $1 - 1 ]}]"; } else { Carp::croak "Can't translate '$1' pseudo-class"; } === t/01_xpath.t ================================================================== --- t/01_xpath.t (revision 1289) +++ t/01_xpath.t (local) @@ -119,3 +119,9 @@ foo.bar, bar --- xpath //foo[contains(concat(' ', @class, ' '), ' bar ')] | //bar + +=== +--- selector +E:nth-child(1) +--- xpath +//E[count(preceding-sibling::*) = 0] === t/02_html.t ================================================================== --- t/02_html.t (revision 1289) +++ t/02_html.t (local) @@ -136,3 +136,22 @@ p.pastoral.marine --- expected <p class="pastoral blue aqua marine">foo</p> + +=== +--- input +<p>foo</p> +<p>bar</p> +--- selector +p:nth-child(1) +--- expected +<p>foo</p> + +=== +--- input +<p>foo</p> +<p>bar</p> +--- selector +p:nth-child(2) +--- expected +<p>bar</p> +
トラックバック - http://d.hatena.ne.jp/tokuhirom/20071111/1194754527


