Old stuff/Perl/bricocine.pl
(Deskargatu)
#!/usr/bin/perl -w
use Data::Dumper;
use POSIX qw(strftime);
use HTML::Entities;
use Date::Parse;
use URI::Escape;
require LWP::UserAgent;
use Date::Manip;
my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/5.0 (X11; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0");
sub trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s };
my @links=();
sub fetch_page($$)
{
my $url = shift;
my $page_url = $url;
my $page_idx = shift;
my $response = $ua->get($url);
if(!$response->is_success)
{
print "ERROR fetching $url - ".Dumper $response;
return;
}
my $content = $response->decoded_content;
# fetch title
my ($title) = ($content=~ /<h1 class="h1.*?>(.*?)<\/h1>/isg);
my ($synopsis) = ($content=~ /class="(?:description)"[^>]+>(.*?)<\/div>/isg);
my ($video) = ($content=~ /<div itemprop="trailer".*?src='([^']+)/isg);
my ($image) = ($content=~ /"article-image".*?src="([^"]+)/isg);
my $ficha="<div style='float:left;margin:30px'><img src='$image' alt=''/></div><div>$synopsis</div><div><a href='$video'>View trailer</a></div>";
my $link_idx = 0;
# fetch links
while($content=~ /class="title">(.*?)<.*?class="metadata a">(.*?)<.*?id="magnet".*?href="(.*?)".*?<\/a>/isg)
{
$link_idx++;
my ($title, $fecha, $url) = ($1,$2,$3);
$title = trim($title);
$fecha = trim($fecha);
$ficha = trim($ficha);
if($url)
{
fetch_link($title, $fecha, $ficha, $page_url, $url, $page_idx, $link_idx);
}
}
}
sub fetch_link(@)
{
my ($title, $fecha, $ficha, $page_url, $url, $page_idx, $link_idx) = (shift,shift,shift,shift, shift, shift,shift);
$page_idx = sprintf("%04d",$page_idx);
$link_idx = sprintf("%04d",$link_idx);
#print "Sub fetch: $title - $fecha\n";
if(!$url)
{
print("FATAL $url");
exit;
}
my $response = $ua->get($url, Referer => $url);
if(!$response->is_success)
{
print "ERROR fetch2 ($url) ".Dumper $response;
return;
}
my $content = $response->decoded_content;
while($content=~ /href="(magnet:.*?)"/g)
{
my $magnet=decode_entities($1);
my ($day, $month, $year) = ($fecha=~ /(\d+)\/(\d+)\/(\d+)/);
$sortkey = $year.$month.$day.".".$page_idx.$link_idx;
if(!$year || !$month || !$day)
{
print "ERROR $fecha ($year, $month, $day)\n";
exit;
}
push(@links, {
'fecha' => $fecha,
'ficha' => $ficha,
'sortkey' => $sortkey,
'title' => $title,
'page_url' => $page_url,
'magnet' => $magnet,
'day' => $day,
'month' => $month,
'year' => $year,
});
}
}
my $url = ("http://www.bricocine.com/");
my $response = $ua->get($url);
if(!$response->is_success)
{
print "ERROR ".Dumper $response;
return;
}
my $content = $response->decoded_content;
my $page_idx =0;
while($content=~ /<section>(.*?)<\/section>/g)
{
my $section=$1;
@links = ();
my ($cat) = ($section=~ /<h2.*?>([^<]+)/is);
$cat = trim($cat);
print "Creating feed from $url - $cat\n";
open(WRITE,">/home/user/public_html/bricocine_$cat.rss");
print WRITE '<?xml version="1.0" encoding="utf-8"?><rss version="2.0">
<channel>
<title>Bricocine '.$cat.'</title>
<link>'.$url.'</link>
<description>.</description>
';
# Just an example: the URL for the most recent /Fresh Air/ show
while($section=~ /class="entry.*?href="(.*?)"/g)
{
$page_idx++;
fetch_page($1, $page_idx);
}
my @sorted = sort { $b->{sortkey} cmp $a->{sortkey} } @links;
my %added=();
my $num=0;
foreach my $hash(@sorted)
{
my $date = $$hash{'year'}."/".$$hash{'month'}."/".$$hash{'day'};
my $http_date = POSIX::strftime("%a, %d %b %Y %H:%M:%S %z", localtime(str2time($date)));
my ($uid) = ($$hash{'magnet'} =~ /btih:([^&]+)/);
if(!$added{$uid} && $num<50)
{
$added{$uid} = 1;
$num++;
print WRITE '
<item>
<title>'.HTML::Entities::encode_numeric($$hash{'title'}).'</title>
<link>'.HTML::Entities::encode_numeric($$hash{'page_url'}).'?'.HTML::Entities::encode_numeric($$hash{'title'}).'</link>
<guid>'.HTML::Entities::encode_numeric($$hash{'page_url'}).'?'.HTML::Entities::encode_numeric($$hash{'title'}).'</guid>
<enclosure url="'.HTML::Entities::encode_numeric($$hash{'magnet'}).'" type="application/x-bittorrent" length="1" />
<description>'.HTML::Entities::encode_numeric($$hash{'ficha'}).'</description>
</item>
';
}
}
print WRITE '</channel>
</rss>';
}