Old stuff/Perl/svn/tfe/trunk/gtkrss_b/Parser.pm
(Deskargatu)
#!/usr/bin/perl -w
use strict;
use IO::Socket::INET;
use Data::Dumper;
use threads;
package Parser;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(get_files $save_old);
my $v =1;
my $save_old;
sub new
{
my $self = {};
$self->{FEEDS} = undef;
$self->{FEEDS_DIR} = "feeds";
$self->{CONFIG_FILE} = "config";
$self->{DATA_DIR} = "data";
$self->{SAVE_OLD} = 20;
$self->{UPDATE_TIMER} = undef;
$self->{DEFIL_TIMER} = undef;
$self->{C_TEXTE} = [];
$self->{NEWS_TITLE_REF} = [];
$self->{NEWS_URL_REF} = undef;
$self->{BROWSER} = "firefox";
bless($self);
return $self;
}
sub set_browser($)
{
my $self = shift;
$self->{BROWSER} = shift;
}
sub get_browser($)
{
my $self = shift;
return $self->{BROWSER};
}
sub set_save_old($)
{
my $self = shift;
$self->{SAVE_OLD} = shift;
}
sub set_defil_timer($)
{
my $self = shift;
$self->{DEFIL_TIMER} = shift;
}
sub set_update_timer($)
{
my $self = shift;
$self->{UPDATE_TIMER} = shift;
}
sub set_config_file($)
{
my $self = shift;
$self->{CONFIG_FILE} = shift;
}
sub set_data_dir($)
{
my $self = shift;
$self->{DATA_DIR} = shift;
}
sub set_feeds_dir($)
{
my $self = shift;
$self->{FEEDS_DIR} = shift;
}
sub get_news_title_ref()
{
my $self =shift;
return $self->{NEWS_TITLE_REF};
}
sub get_news_url_ref()
{
my $self =shift;
return $self->{NEWS_URL_REF};
}
sub get_config_file()
{
my $self =shift;
return $self->{CONFIG_FILE};
}
sub get_update_timer()
{
my $self =shift;
return $self->{UPDATE_TIMER};
}
sub get_defil_timer()
{
my $self =shift;
return $self->{DEFIL_TIMER};
}
sub feeds()
{
my $self = shift;
return $self->{FEEDS};
}
sub read_all()
{
my $self = shift;
foreach my $file(keys %{$self->{FEEDS}})
{
for (0 .. scalar @{$self->{FEEDS}{$file}})
{
if (${$self->{FEEDS}{$file}}[$_])
{
${$self->{FEEDS}{$file}}[$_]{read} = 1;
}
# print "File : $hash_ref\n";
}
}
}
sub read_none()
{
my $self = shift;
foreach my $file(keys %{$self->{FEEDS}})
{
for (0 .. scalar @{$self->{FEEDS}{$file}})
{
if (${$self->{FEEDS}{$file}}[$_])
{
${$self->{FEEDS}{$file}}[$_]{read} = 0;
}
# print "File : $hash_ref\n";
}
}
}
sub parse(@)
{
my $parse_ref = shift;
my @parse_data = @{$parse_ref} ;
#print "RECU : ".$parse_ref."\n";
my @titles;
my $i=-1;
my $elt;
my $opened;
my $start=0;
my $old_m;
my $old_d=0;
my $content;
my $perdu=0;
foreach(@parse_data)
{
# print "$_ \n\touvert: $opened\n" if $opened;
# print "baliser pour $_ ? \n" if !$opened;
if(/<item(?:>| .*>)/) { $start = 1; $i++; }
elsif (/<\/item>/) { $start=0; }
elsif($start)
{
if (!$opened and /^\s*<([\w:]+)(?: .*)?>(?:<\!\[CDATA\[)?(.*)(\]\]>)?<\/\1>/)
{
my ($opened,$content) = ($1,$2);
$content =~ s/\]\]>$//;
# print "<$opened /> pour $_\n";
$titles[$i]{"$opened"}=$content;
$opened= "";
}
elsif(!$opened and /^\s*<([\w\:]+)(?: [^>]+?)?>(?:<!\[CDATA\[)?(.+?)$/)
{
($opened,$content)=($1,$2);
# print "<$opened>$content pour $_\n";
push(@{$titles[$i]{"$opened"}},$content);
}
elsif($opened && /(.+?(?:)\]\]>)?<\/$opened>/) {
# print "</$opened> pour $_\n";
push(@{$titles[$i]{"$opened"}},$1);
$opened="";
}
elsif($opened)
{ push(@{$titles[$i]{"$opened"}},$_);
}
else
{
$perdu++;
}
}
}
#print "($perdu unused line) " if $perdu;
return @titles;
}
sub start_parse
{
my $self = shift;
my $data_dir = $self->{DATA_DIR};
# print "START PARSE $data_dir !\n";
opendir(DATA,$data_dir) or die $!;
my @fichiers = grep { ! /^\./ } readdir(DATA);
# print "FICHIERS : @fichiers\n";
foreach my $file(@fichiers)
{
if (!-f "$data_dir/$file") { next; }
# print "etud ede $data_dir/$file\n";
open(INFO,"$data_dir/$file") or die $!;
my @data = <INFO>;
my @resultat = &parse(\@data);
$self->{FEEDS}{"$file"} = \@resultat;
}
}
sub update_parse
{
my $self = shift;
# print "UPDATE PARSE: lecture des feeds recus ..." if $v;
my $feeds_dir = $self->{FEEDS_DIR};
# print "FEED DIR: $feeds_dir\n";
opendir(DATA,$feeds_dir);
my @fichiers = grep { ! /^\./ } readdir(DATA);
ID:
foreach my $file(@fichiers)
{
my @temp_feeds;
open(INFO,"$feeds_dir/$file");
my @data = <INFO>;
my @resultat = &parse(\@data);
for my $i(0..$#resultat)
{
if ($self->{FEEDS}{$file}[0]{'title'} and $resultat[$i]{'title'} eq $self->{FEEDS}{$file}[0]{'title'})
{
# print "Fin a $feeds{$file}[0]{'title'}\n";
last ;
}
else { $resultat[$i]{'read'} = 0; }
# print "Ajout de $resultat[$i]{'title'} <=> $feeds{$file}[0]{'title'}\n";
if (ref(${$self->{FEEDS}{"$file"}}[0]) ne "HASH")
{
# print "PAS BON! \n";
${$self->{FEEDS}{"$file"}}[0] = $resultat[$i];
}
else {
# print "Ajout !\n";
# push(@{$feeds{"$file"}},$resultat[$i]);
push(@temp_feeds,$resultat[$i]);
}
}
unshift(@{$self->{FEEDS}{"$file"}}, @temp_feeds);
}
# print "OK\nUPDATE PARSE: suppression des vieux elements ... \n" if $v;
foreach my $file(keys %{$self->{FEEDS}})
{
# print "\t $file: ".scalar @{$self->{FEEDS}{$file}}." / $self->{SAVE_OLD}\n" if $v;
while( scalar @{$self->{FEEDS}{$file}} > $self->{SAVE_OLD}) { pop(@{$self->{FEEDS}{$file}}) }
# print "\n";
}
}
sub c_texte()
{
my $self = shift;
return $self->{C_TEXTE};
}
sub titles
{
my $self = shift;
my @titles=();
foreach my $file(keys %{$self->{FEEDS}})
{
foreach my $arr(@{$self->{FEEDS}{$file}})
{
push(@titles,${$arr}{'title'}) if ${$arr}{'title'};
}
}
return @titles;
}
sub generate_c_texte
{
my $self = shift;
$self->{C_TEXTE} =();
$self->{NEWS_TITLE_REF} = ();
my $compteur = 0;
foreach my $file(keys %{$self->{FEEDS}})
{
my @file = split '',$file;
foreach my $arr(@{$self->{FEEDS}{$file}})
{
if (${$arr}{'title'} and ${$arr}{'read'} == 0)
{
push(@{$self->{NEWS_TITLE_REF}},$compteur);
${$self->{NEWS_URL_REF}{"$compteur"}} = ${$arr}{'link'};
# print "URL: ${$arr}{'link'} \n";
${$arr}{'title'} =~ s/^<\!\[CDATA\[(.*)\]\]>$/$1/g;
foreach(@file)
{
push (@{$self->{C_TEXTE}},"<b>".$_."</b>");
$compteur++;
}
# print "\n";
push(@{$self->{C_TEXTE}},":"," ");
$compteur++;
my @special=();
foreach(split '',${$arr}{'title'})
{
if ($_ eq "&" and !@special) { @special=("&"); next; }
elsif(@special) { push(@special,$_); }
if(@special and $_ eq ";") { push(@{$self->{C_TEXTE}},join("",@special)); @special=(); $compteur++;}
elsif(!@special) {
push(@{$self->{C_TEXTE}},$_);
$compteur++;
}
}
push(@{$self->{C_TEXTE}}," ","-","-"," ");
}
}
}
if (!$self->{C_TEXTE}) { @{$self->{C_TEXTE}} = ("N","O","T","H","I","N","G"," ","N","E","W"," !"); }
# print join(" ",@{$self->{C_TEXTE}});
return $self->{C_TEXTE};
}
sub update_file($)
{
my $self = shift;
my $data_dir = $self->{DATA_DIR};
# print "DATA DIR $data_dir\n";
foreach my $file(keys %{$self->{FEEDS}})
{
# print "Mise a jour du fichier $data_dir/$file \n";
open(WRITE,">$data_dir/$file") or die("Error $! \n");
my @arr = @{$self->{FEEDS}{$file}};
if ($self->{SAVE_OLD} == 0) { $self->{SAVE_OLD} = scalar @arr; }
foreach my $key(@arr[0.. (scalar @arr > $self->{SAVE_OLD} ? $self->{SAVE_OLD}-1 : scalar @arr-1)])
{
print WRITE "<item>\n";
foreach(keys %{$key})
{
if (ref(${$key}{$_}) eq "ARRAY")
{
print WRITE "<$_>";
foreach my $t(@{$key}{$_})
{
foreach my $j(@{$t})
{
if ($j) { print WRITE $j; }
}
}
print WRITE "</$_>\n";
}
else
{
print WRITE "<$_>".${$key}{$_}."</$_>\n";
# print "<$_>${$key}{$_}</$_>\n";
}
}
print WRITE "</item>\n\n\n";
}
close WRITE;
}
}
sub get_files($)
{
my $self = shift;
my $file = $self->{CONFIG_FILE};
print "Telechargement des nouveaux flux RSS: \n" if $v;
# print "FILE : $file\n";
open(FILES,$file) or die $!;
my @files = <FILES>;
close FILES;
my $counter=0;
foreach my $data_file(@files)
{
my ($emplacement,$file) = split(' ',$data_file);
# print "File : $file\n";
chomp($file);
chomp($emplacement);
# print "Etude de $file\n";
$counter++;
my ($sock,$contenu);
my ($addr,$path,$fichier) = $file =~ /^https?:\/\/(.+?)\/(.*\/)?(.+?)$/;
$path = $path ? $path : "";
if (!$addr or !$fichier) { warn("PB avec $data_file avec $addr et $fichier !\n"); next; }
print "\tDownloading from $addr: " if $v;
$sock = IO::Socket::INET->new (
PeerAddr=>"$addr",
PeerPort=>'http(80)',
Proto=>'tcp'
) or next;
$sock -> autoflush(1);
# print "Connection : OK\n$file\n";
my $requete = "GET /$path$fichier HTTP/1.1\nHost: $addr\n\n";
# print "REQUETE : $requete\n";
print $sock $requete;
open(FICHIER,">".${$self}{FEEDS_DIR}."/$emplacement") or warn "Impossible d ouvrir la sortie en ecriture \n";
my @temp = ();
my $i=0;
my $chunked=0;
my $chunk_count=0;
my $chunk_reprise=0;
my $encoding = 0;
foreach(<$sock>)
{
$contenu = $_;
if ($contenu =~ /^HTTP\/1.1 404/i) { print "Erreur 404 : $data_file!\n\n"; next; }
if ($contenu =~ /^Transfer-Encoding: chunked/) { print "CHUNKED ! ";$chunked=1; next; }
if ($contenu =~ /^Content-Type: /) { next; }
if ($contenu =~ /^charset=(.*)/) { $encoding=$1 ; next; }
if (!$chunked)
{
$contenu =~ s/\r//g;
$contenu =~ s/\n//g;
push(@temp,$contenu);
}
else
{
# print "compteur : $chunk_count\n";
if ($chunk_count<=0)
{
$contenu =~ s/[^a-f0-9]//g;
$chunk_count = hex $contenu;
# print "$chunk_count, ";
$chunk_reprise=1;
}
else
{
# print "Ajout de $contenu\n";
$chunk_count-= length($contenu);
$contenu =~ s/\r//g;
$contenu =~ s/\n//g;
if ($chunk_reprise) {
$temp[$#temp].=$contenu;
$chunk_reprise=0;
}
else { push(@temp,$contenu); }
}
}
$i++;
}
print "...";
print FICHIER join("\n",@temp);
# print "Fin du sock\n";
close FICHIER;
close $sock;
# print "Fin du fichier en cours\n";
print "\tOK\n" if $v;
}
print "Fin Mise a jour\n\n" if $v;
# print "Fin fonction\n";
}
1;