Old stuff/Perl/svn/tfe/trunk/gtkrss/Parser.pm
(Deskargatu)
#!/usr/bin/perl -w
use strict;
use IO::Socket::INET;
use Data::Dumper;
#use threads;
use Unicode::String;
package Parser;
require Exporter;
$|=1;
our @ISA = qw(Exporter);
our @EXPORT = qw(get_files $save_old);
my $v =1;
my $save_old;
sub new
{
my $self = {};
$self->{FEEDS} = undef;
$self->{FEEDS_DIR} = "feeds";
$self->{CONFIG_FILE} = "config";
$self->{DATA_DIR} = "data";
$self->{SAVE_OLD} = 20;
$self->{UPDATE_TIMER} = undef;
$self->{DEFIL_TIMER} = undef;
$self->{C_TEXTE} = [];
$self->{NEWS_TITLE_REF} = [];
$self->{NEWS_URL_REF} = undef;
$self->{BROWSER} = "firefox";
bless($self);
return $self;
}
sub set_browser($)
{
my $self = shift;
$self->{BROWSER} = shift;
}
sub get_browser($)
{
my $self = shift;
return $self->{BROWSER};
}
sub set_save_old($)
{
my $self = shift;
$self->{SAVE_OLD} = shift;
}
sub set_defil_timer($)
{
my $self = shift;
$self->{DEFIL_TIMER} = shift;
}
sub set_update_timer($)
{
my $self = shift;
$self->{UPDATE_TIMER} = shift;
}
sub set_config_file($)
{
my $self = shift;
$self->{CONFIG_FILE} = shift;
}
sub set_data_dir($)
{
my $self = shift;
$self->{DATA_DIR} = shift;
}
sub set_feeds_dir($)
{
my $self = shift;
$self->{FEEDS_DIR} = shift;
}
sub get_news_title_ref()
{
my $self =shift;
return $self->{NEWS_TITLE_REF};
}
sub get_news_url_ref()
{
my $self =shift;
return $self->{NEWS_URL_REF};
}
sub get_config_file()
{
my $self =shift;
return $self->{CONFIG_FILE};
}
sub get_update_timer()
{
my $self =shift;
return $self->{UPDATE_TIMER};
}
sub get_defil_timer()
{
my $self =shift;
return $self->{DEFIL_TIMER};
}
sub feeds()
{
my $self = shift;
return $self->{FEEDS};
}
sub read_all()
{
my $self = shift;
foreach my $file(keys %{$self->{FEEDS}})
{
for (0 .. scalar @{$self->{FEEDS}{$file}}-1)
{
if (${$self->{FEEDS}{$file}}[$_])
{
${$self->{FEEDS}{$file}}[$_]{read} = 1;
}
# print "File : $hash_ref\n";
}
}
}
sub read_none()
{
my $self = shift;
foreach my $file(keys %{$self->{FEEDS}})
{
for (0 .. scalar @{$self->{FEEDS}{$file}})
{
if (${$self->{FEEDS}{$file}}[$_])
{
${$self->{FEEDS}{$file}}[$_]{read} = 0;
}
# print "File : $hash_ref\n";
}
}
}
sub parse(@)
{
my $parse_ref = shift;
my @parse_data = @{$parse_ref} ;
#print "RECU : ".$parse_ref."\n";
my @titles;
my $i=-1;
my $elt;
my $opened;
my $start=0;
my $old_m;
my $old_d=0;
my $content;
my $perdu=0;
foreach(@parse_data)
{
# print "$_ \n\touvert: $opened\n" if $opened;
# print "baliser pour $_ ? \n" if !$opened;
if(/<item(?:>| .*>)/) { $start = 1; $i++; }
elsif (/<\/item>/) { $start=0; }
elsif($start)
{
if (!$opened and /^\s*<([\w:]+)(?: .*)?>(?:<\!\[CDATA\[)?(.+?)(\]\]>)?<\/\1>/)
{
my ($opened,$content) = ($1,$2);
$content =~ s/\]\]>$//;
# print "<$opened /> pour $_\n";
$titles[$i]{"$opened"}=$content;
$opened= "";
}
elsif(!$opened and /^\s*<([\w\:]+)(?: [^>]+?)?>(?:<!\[CDATA\[)?(.+?)$/)
{
($opened,$content)=($1,$2);
# print "<$opened>$content pour $_\n";
push(@{$titles[$i]{"$opened"}},$content);
}
elsif($opened && /(.+?)(?:\]\]>)?<\/$opened>/) {
# print "</$opened> pour $_\n";
push(@{$titles[$i]{"$opened"}},$1);
$opened="";
}
elsif($opened)
{ push(@{$titles[$i]{"$opened"}},$_);
}
else
{
$perdu++;
}
}
}
#print "($perdu unused line) " if $perdu;
return @titles;
}
sub start_parse
{
my $self = shift;
my $data_dir = $self->{DATA_DIR};
# print "START PARSE $data_dir !\n";
opendir(DATA,$data_dir) or die $!;
my @fichiers = grep { ! /^\./ } readdir(DATA);
# print "FICHIERS : @fichiers\n";
foreach my $file(@fichiers)
{
if (!-f "$data_dir/$file") { next; }
# print "etud ede $data_dir/$file\n";
open(INFO,"$data_dir/$file") or die $!;
my @data = <INFO>;
my @resultat = &parse(\@data);
$self->{FEEDS}{"$file"} = \@resultat;
}
}
sub update_parse
{
my $self = shift;
# print "UPDATE PARSE: lecture des feeds recus ..." if $v;
my $feeds_dir = $self->{FEEDS_DIR};
# print "FEED DIR: $feeds_dir\n";
opendir(DATA,$feeds_dir);
my @fichiers = grep { ! /^\./ } readdir(DATA);
ID:
foreach my $file(@fichiers)
{
# print "Fichier: $file\n";
my @temp_feeds;
open(INFO,"$feeds_dir/$file");
my @data = <INFO>;
my @resultat = &parse(\@data);
for my $i(0..$#resultat)
{
# print "element en cours: ".(keys %{$self->{FEEDS}{$file}[0]})."\n";
if ($self->{FEEDS}{$file}[0]{'title'} and $resultat[$i]{'title'} eq $self->{FEEDS}{$file}[0]{'title'})
{
last ;
}
else { $resultat[$i]{'read'} = 0; }
if (ref(${$self->{FEEDS}{"$file"}}[0]) ne "HASH")
{
${$self->{FEEDS}{"$file"}}[0] = $resultat[$i];
}
else {
push(@temp_feeds,$resultat[$i]);
}
}
unshift(@{$self->{FEEDS}{"$file"}}, @temp_feeds);
}
foreach my $file(keys %{$self->{FEEDS}})
{
while(scalar @{$self->{FEEDS}{$file}} > $self->{SAVE_OLD}) { pop(@{$self->{FEEDS}{$file}}) }
}
}
sub c_texte()
{
my $self = shift;
return $self->{C_TEXTE};
}
sub titles
{
my $self = shift;
my @titles=();
foreach my $file(keys %{$self->{FEEDS}})
{
foreach my $arr(@{$self->{FEEDS}{$file}})
{
push(@titles,${$arr}{'title'}) if ${$arr}{'title'};
}
}
return @titles;
}
sub generate_c_texte
{
my $self = shift;
$self->{C_TEXTE} =();
$self->{NEWS_TITLE_REF} = ();
my $compteur = 0;
foreach my $file(keys %{$self->{FEEDS}})
{
my @file = split '',$file;
foreach my $arr(@{$self->{FEEDS}{$file}})
{
if (${$arr}{'title'} and ${$arr}{'read'} == 0)
{
push(@{$self->{NEWS_TITLE_REF}},$compteur);
${$self->{NEWS_URL_REF}{"$compteur"}} = ${$arr}{'link'};
# print "URL: ${$arr}{'link'} \n";
${$arr}{'title'} =~ s/^<\!\[CDATA\[(.*)\]\]>$/$1/g;
foreach(@file)
{
push (@{$self->{C_TEXTE}},"<b>".$_."</b>");
$compteur++;
}
# print "\n";
push(@{$self->{C_TEXTE}},":"," ");
$compteur++;
my @special=();
foreach(split '',${$arr}{'title'})
{
if ($_ eq "&" and !@special) { @special=("&"); next; }
elsif(@special) { push(@special,$_); }
if(@special and $_ eq ";") { push(@{$self->{C_TEXTE}},join("",@special)); @special=(); $compteur++;}
elsif(!@special) {
push(@{$self->{C_TEXTE}},$_);
$compteur++;
}
}
push(@{$self->{C_TEXTE}}," ","-","-"," ");
}
}
}
if (!$self->{C_TEXTE}) { @{$self->{C_TEXTE}} = ("N","O","T","H","I","N","G"," ","N","E","W"," !"); }
# print join(" ",@{$self->{C_TEXTE}});
return $self->{C_TEXTE};
}
sub update_file($)
{
my $self = shift;
my $data_dir = $self->{DATA_DIR};
# print "Execution de l update_files\n";
# print "DATA DIR $data_dir\n";
foreach my $file(keys %{$self->{FEEDS}})
{
# print "Mise a jour du fichier $data_dir/$file \n";
open(WRITE,">$data_dir/$file") or die("Error $! \n");
my @arr = @{$self->{FEEDS}{$file}};
if ($self->{SAVE_OLD} == 0) { $self->{SAVE_OLD} = scalar @arr; }
foreach my $key(@arr[0.. (scalar @arr > $self->{SAVE_OLD} ? $self->{SAVE_OLD} -1 : (scalar @arr)-1)])
{
if (!%{$key}) { next; }
print WRITE "<item>\n";
foreach(keys %{$key})
{
if (ref(${$key}{$_}) eq "ARRAY")
{
print WRITE "<$_>";
foreach my $t(@{$key}{$_})
{
foreach my $j(@{$t})
{
if ($j) { print WRITE $j; }
}
}
print WRITE "</$_>\n";
}
else
{
print WRITE "<$_>".${$key}{$_}."</$_>\n";
# print "<$_>${$key}{$_}</$_>\n";
}
}
print WRITE "</item>\n\n\n";
}
close WRITE;
}
}
sub get_files()
{
my $self = shift;
my $file = $self->{CONFIG_FILE};
print "Telechargement des nouveaux flux RSS: \n" if $v;
# print "FILE : $file\n";
open(FILES,$file) or die $!;
my @files = <FILES>;
close FILES;
my $counter=0;
foreach my $data_file(@files)
{
my ($emplacement,$file) = split(' ',$data_file);
# print "File : $file\n";
chomp($file);
chomp($emplacement);
# print "Etude de $file\n";
$counter++;
my ($sock,$contenu);
my ($addr,$path,$fichier) = $file =~ /^https?:\/\/(.+?)\/(.*\/)?(.+?)$/;
$path = $path ? $path : "";
if (!$addr or !$fichier) { warn("PB avec $data_file avec $addr et $fichier !\n"); next; }
print "\tDownloading from $addr: " if $v;
$sock = IO::Socket::INET->new (
PeerAddr=>"$addr",
PeerPort=>'http(80)',
Proto=>'tcp'
) or next;
$sock -> autoflush;
my $requete = "GET /$path$fichier HTTP/1.1\r\nHost: $addr\r\nUser-Agent: tfeserver Parser\r\n\r\n";
print $sock $requete;
open(FICHIER,">".${$self}{FEEDS_DIR}."/$emplacement") or warn "Impossible d ouvrir la sortie (".${$self}{FEEDS_DIR}."/$emplacement) en ecriture \n";
my @temp = ();
my $i=0;
my $chunked=0;
my $chunk_count=0;
my $chunk_reprise=0;
my $encoding = 0;
my $started=0;
my $check=1;
while(defined($_ = <$sock>))
{
$contenu = $_;
if (!$started)
{
if ($contenu =~ /HTTP\/1.1 404/i) { print "Erreur 404 : $data_file!\n\n"; }
if ($contenu =~ /Transfer-Encoding: chunked/) { print "CHUNKED ! " if $v; $chunked=1; }
if ($contenu =~ /Content-Length: (\d+)/i) { $chunk_count=$1; }
if ($contenu =~ /Connection: close/i) { $check=0; }
if ($contenu =~ /Charset=(.*)/i) { $encoding=$1; }
if ($contenu =~ /^\r?$/) { $started=1; }
}
else {
if (!$chunked) {
$chunk_count-= length($contenu);
$contenu =~ s/\r//g;
$contenu =~ s/\n//g;
if ($encoding =~ /utf\-(\d+)/i) { $contenu = Unicode::String::utf8("$contenu")->latin1; }
push(@temp,$contenu);
if ($chunk_count <= 0 and $check) { close $sock; last;}
}
else {
if ($chunk_count<=0) {
$contenu =~ s/[^a-f0-9]//g;
$chunk_count = hex $contenu;
if ($chunk_count <= 0 && length($contenu)>0) { close $sock; last; }
$chunk_reprise=1;
}
else {
$chunk_count-= length($contenu);
if ($encoding =~ /utf\-(\d+)/i && $contenu) { $contenu = Unicode::String::utf8("$contenu")->latin1; }
$contenu =~ s/\r//g;
$contenu =~ s/\n//g;
if ($chunk_reprise) {
if( @temp) { $temp[$#temp].=$contenu; }
else { push(@temp,$contenu); }
$chunk_reprise=0;
}
else { push(@temp,$contenu); }
}
}
}
# $i++;
}
print FICHIER join("\n",@temp);
close FICHIER;
close $sock;
print "\tOK\n" if $v;
}
print "Fin Mise a jour\n\n" if $v;
}
sub conf()
{
my $self = shift;
my $window = Gtk2::Window->new('toplevel') ;
$window->set_title ("Affichage des flux") ;
$window->set_default_size(600 ,400 ) ;
$window->show();
my $hbox = Gtk2::VBox->new(1,0);
$window->add($hbox);
$hbox->show();
my @titres;
foreach my $elt($self->{FEEDS})
{
my %contenu = %{$elt};
foreach my $cle(keys %contenu)
{
push(@titres, Gtk2::Label->new());
$titres[$#titres]->set_markup("<b>$cle</b>");
foreach my $element(@{$self->{FEEDS}->{$cle}})
{
if (!$element) { next; }
if (!$element->{title}) {
foreach(keys (%{$element})) { print "ERROR: $_\n"; }
}
else
{
push(@titres, Gtk2::Label->new(($element->{read} ? "" : "(NEW)")."\t$element->{title}"));
# $titres[$#titres]->set_justify('left');
}
}
}
}
foreach(@titres)
{
# $_->set_justify('left');
$hbox->pack_start($_,1,0,0);
$_->show();
}
}
1;