redtower
3/2/2011 - 2:19 AM

Yahoo MusicからArtistを検索しIDを取得する。

Yahoo MusicからArtistを検索しIDを取得する。

#!/usr/bin/perl
use strict;
use warnings;
use LWP::Simple 'get';
use URI;
use Encode;

my $word = $ARGV[0] ? decode('utf-8', $ARGV[0]) : "weezer";
my $uri = create_uri($word);

my $data = get($uri);
foreach my $line (split(/\n/, $data)) {
    if ($line =~ s/.*lft"><[a-z =":\/.]*([A-Z0-9]*)\/">(.*)<\/a><\/td>/$1,$2/g) {
        my @item = split(/\,/,$line, 2);
        print encode('utf-8',$item[1]) . "," . $item[0] . "\n";
    }
}
exit;

sub create_uri {
    my $word = shift;
    my $uri=URI->new('http://search.music.yahoo.co.jp/musicsearch');

    $uri->query_form(
        cc => 'as',
        cp => $word
    );

    return $uri;
}
#!/usr/bin/perl
use strict;
use warnings;
use Web::Scraper;
use URI;
use Encode;

my $word = $ARGV[0] ? decode('utf-8', $ARGV[0]) : "weezer";
my $uri = create_uri($word);

my $scraper = scraper {
    process 'div.ymsc-mn71 table.artist td.lft a',
            'items[]' => {url=>'@href', name=>'TEXT', }
};
if ($ENV{'HTTP_PROXY'}) {
    $scraper->user_agent->proxy('http', $ENV{'HTTP_PROXY'});
}
my $res = $scraper->scrape($uri);

foreach my $item (@{$res->{items}}) {
    $item->{url} =~ s/.*\/([^\/]*)\/$/$1/;
    print encode('utf-8',$item->{name}) . "," . $item->{url} . "\n";
}
exit;

sub create_uri {
    my $word = shift;
    my $uri=URI->new('http://search.music.yahoo.co.jp/musicsearch');

    $uri->query_form(
        cc => 'as',
        cp => $word
    );

    return $uri;
}