Tfe

Ongi etorri tfe-ren webgunera...

Old stuff/Perl/bricocine.pl

(Deskargatu)
#!/usr/bin/perl -w

use Data::Dumper;
use POSIX qw(strftime);
use HTML::Entities;
use Date::Parse;
use URI::Escape;
require LWP::UserAgent;
use Date::Manip;

my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/5.0 (X11; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0");

sub  trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s };

my @links=();
sub fetch_page($$)
{
	my $url = shift;
    my $page_url = $url;
	my $page_idx = shift;

	my $response = $ua->get($url);
	if(!$response->is_success)
	{
		print "ERROR fetching $url - ".Dumper $response;
		return;
	}
	my $content = $response->decoded_content;

	# fetch title
	my ($title) = ($content=~ /<h1 class="h1.*?>(.*?)<\/h1>/isg);

	my ($synopsis) = ($content=~ /class="(?:description)"[^>]+>(.*?)<\/div>/isg);
	my ($video) = ($content=~ /<div itemprop="trailer".*?src='([^']+)/isg);
	my ($image) = ($content=~ /"article-image".*?src="([^"]+)/isg);
	my $ficha="<div style='float:left;margin:30px'><img src='$image' alt=''/></div><div>$synopsis</div><div><a href='$video'>View trailer</a></div>";

	my $link_idx = 0;
	# fetch links
	while($content=~ /class="title">(.*?)<.*?class="metadata a">(.*?)<.*?id="magnet".*?href="(.*?)".*?<\/a>/isg)
	{
		$link_idx++;
		my ($title, $fecha, $url) = ($1,$2,$3);
		$title = trim($title);
		$fecha = trim($fecha);
		$ficha = trim($ficha);

		if($url)
		{
			fetch_link($title, $fecha, $ficha, $page_url, $url, $page_idx, $link_idx);
		}
	}
}

sub fetch_link(@)
{
	my ($title, $fecha, $ficha, $page_url, $url, $page_idx, $link_idx) = (shift,shift,shift,shift, shift, shift,shift);

	$page_idx = sprintf("%04d",$page_idx);
	$link_idx = sprintf("%04d",$link_idx);

    #print "Sub fetch: $title - $fecha\n";
	if(!$url)
	{
		print("FATAL $url");
		exit;
	}
	my $response = $ua->get($url, Referer => $url);
	if(!$response->is_success)
	{
		print "ERROR fetch2 ($url) ".Dumper $response;
		return;
	}
	my $content = $response->decoded_content;
	while($content=~ /href="(magnet:.*?)"/g)
	{
		my $magnet=decode_entities($1);

		my ($day, $month, $year) = ($fecha=~ /(\d+)\/(\d+)\/(\d+)/);
		$sortkey = $year.$month.$day.".".$page_idx.$link_idx;
		if(!$year || !$month || !$day)
		{
		print "ERROR $fecha ($year, $month, $day)\n";
		exit;
		}

		push(@links, {
			'fecha' => $fecha,
			'ficha' => $ficha,
			'sortkey' => $sortkey,
			'title' => $title,
			'page_url' => $page_url,
			'magnet' => $magnet,
			'day' => $day,
			'month' => $month,
			'year' => $year,
		});
	}
}

my $url = ("http://www.bricocine.com/");


my $response = $ua->get($url);
if(!$response->is_success)
{
    print "ERROR ".Dumper $response;
    return;
}
my $content = $response->decoded_content;
my $page_idx =0;

while($content=~ /<section>(.*?)<\/section>/g)
{
    my $section=$1;

    @links = ();
    my ($cat) = ($section=~ /<h2.*?>([^<]+)/is);
    $cat = trim($cat);

    print "Creating feed from $url - $cat\n";

    open(WRITE,">/home/user/public_html/bricocine_$cat.rss");

    print WRITE '<?xml version="1.0" encoding="utf-8"?><rss version="2.0">
        <channel>
        <title>Bricocine '.$cat.'</title>
        <link>'.$url.'</link>
        <description>.</description>

    ';

# Just an example: the URL for the most recent /Fresh Air/ show
    while($section=~ /class="entry.*?href="(.*?)"/g)
    {
        $page_idx++;
        fetch_page($1, $page_idx);
    }

    my @sorted =  sort { $b->{sortkey} cmp $a->{sortkey} } @links;
    my %added=();
    my $num=0;
    foreach my $hash(@sorted)
    {
        my $date = $$hash{'year'}."/".$$hash{'month'}."/".$$hash{'day'};
        my $http_date = POSIX::strftime("%a, %d %b %Y %H:%M:%S %z", localtime(str2time($date)));

        my ($uid) = ($$hash{'magnet'} =~ /btih:([^&]+)/);
        if(!$added{$uid} && $num<50)
        {
            $added{$uid} = 1;
            $num++;
            print WRITE '
                    <item>
                        <title>'.HTML::Entities::encode_numeric($$hash{'title'}).'</title>
                        <link>'.HTML::Entities::encode_numeric($$hash{'page_url'}).'?'.HTML::Entities::encode_numeric($$hash{'title'}).'</link>
                        <guid>'.HTML::Entities::encode_numeric($$hash{'page_url'}).'?'.HTML::Entities::encode_numeric($$hash{'title'}).'</guid>
                        <enclosure url="'.HTML::Entities::encode_numeric($$hash{'magnet'}).'" type="application/x-bittorrent" length="1" />
                        <description>'.HTML::Entities::encode_numeric($$hash{'ficha'}).'</description>
                    </item>

                    ';
        }
    }

    print WRITE '</channel>
    </rss>';
}