Tfe

Ongi etorri tfe-ren webgunera...
Old stuff/Perl/svn/tfe/trunk/gtkrss/Parser.pm

(Deskargatu)
#!/usr/bin/perl -w
use strict;
use IO::Socket::INET;
use Data::Dumper;
#use threads;
use Unicode::String;

package Parser;
require Exporter;

$|=1;
our @ISA = qw(Exporter);
our @EXPORT = qw(get_files $save_old);

my $v =1;
my $save_old;

sub new
{
    my $self = {};
    $self->{FEEDS} = undef;
    $self->{FEEDS_DIR} = "feeds";
    $self->{CONFIG_FILE} = "config";    
    $self->{DATA_DIR} = "data";
    $self->{SAVE_OLD} = 20;

    $self->{UPDATE_TIMER} = undef;
    $self->{DEFIL_TIMER} = undef;    
    
    $self->{C_TEXTE} = [];
    $self->{NEWS_TITLE_REF} = [];
    $self->{NEWS_URL_REF} = undef;

    $self->{BROWSER} = "firefox";
        
    
    bless($self);
    return $self;
}

sub set_browser($)
{
    my $self = shift;
    $self->{BROWSER} = shift;
}

sub get_browser($)
{
    my $self = shift;
    return $self->{BROWSER};
}
sub set_save_old($)
{
    my $self = shift;
    $self->{SAVE_OLD} = shift;

}

sub set_defil_timer($)
{
    my $self = shift;
    $self->{DEFIL_TIMER} = shift;

}
sub set_update_timer($)
{
    my $self = shift;
    $self->{UPDATE_TIMER} = shift;

}

sub set_config_file($)
{
    my $self = shift;
    $self->{CONFIG_FILE} = shift;

}


sub set_data_dir($)
{
    my $self = shift;
    $self->{DATA_DIR} = shift;
}
sub set_feeds_dir($)
{
    my $self = shift;
    $self->{FEEDS_DIR} = shift;
}

sub get_news_title_ref()
{	
    my $self =shift;
    return $self->{NEWS_TITLE_REF};
}
sub get_news_url_ref()
{	
    my $self =shift;
    return $self->{NEWS_URL_REF};
}

sub get_config_file()
{	
    my $self =shift;
    return $self->{CONFIG_FILE};
}

sub get_update_timer()
{	
    my $self =shift;
    return $self->{UPDATE_TIMER};
}
sub get_defil_timer()
{	
    my $self =shift;
    return $self->{DEFIL_TIMER};
}
sub feeds()
{
    my $self = shift;
    return $self->{FEEDS};
}









sub read_all()
{
    my $self = shift;
    foreach my $file(keys %{$self->{FEEDS}})
    {
	for (0 .. scalar @{$self->{FEEDS}{$file}}-1)
	{
	if (${$self->{FEEDS}{$file}}[$_])
	{
	${$self->{FEEDS}{$file}}[$_]{read} = 1;
	}
	
#    	print "File : $hash_ref\n";
	}
    }
}

sub read_none()
{
    my $self = shift;
    foreach my $file(keys %{$self->{FEEDS}})
    {
	for (0 .. scalar @{$self->{FEEDS}{$file}})
	{
	if (${$self->{FEEDS}{$file}}[$_])
	{
	${$self->{FEEDS}{$file}}[$_]{read} = 0;
	}
	
#    	print "File : $hash_ref\n";
	}
    }
}




sub parse(@)
{
my $parse_ref = shift;

my @parse_data = @{$parse_ref} ;

#print "RECU  : ".$parse_ref."\n";
my @titles;

my $i=-1;
my $elt;
my $opened;
my $start=0;
my $old_m;
my $old_d=0;
my $content;
my $perdu=0;

foreach(@parse_data)
{

#    print "$_ \n\touvert: $opened\n" if $opened;
#    print "baliser pour $_ ? \n" if !$opened;
    if(/<item(?:>| .*>)/) { $start = 1;   $i++; }
    elsif (/<\/item>/) { $start=0;  }
    elsif($start)
    {
	if (!$opened and /^\s*<([\w:]+)(?: .*)?>(?:<\!\[CDATA\[)?(.+?)(\]\]>)?<\/\1>/) 
	    { 
	    my ($opened,$content) = ($1,$2);
	    $content =~ s/\]\]>$//;
#	    print "<$opened /> pour $_\n";	    
            $titles[$i]{"$opened"}=$content;  
	    $opened= "";	    
	    }
	elsif(!$opened and /^\s*<([\w\:]+)(?: [^>]+?)?>(?:<!\[CDATA\[)?(.+?)$/) 
	    {
	    ($opened,$content)=($1,$2);
#	    print "<$opened>$content pour $_\n";	    
	    push(@{$titles[$i]{"$opened"}},$content);
	    }
	elsif($opened && /(.+?)(?:\]\]>)?<\/$opened>/) {
#	    print "</$opened> pour $_\n";
	    push(@{$titles[$i]{"$opened"}},$1); 
	    $opened="";
	    }
	elsif($opened) 
	    { 	    push(@{$titles[$i]{"$opened"}},$_); 
	    }
	else 
	    { 
	    $perdu++;
	    }
    
    }   
}
#print "($perdu unused line) " if $perdu;
return @titles;
}



sub start_parse
{
    my $self = shift;
    my $data_dir = $self->{DATA_DIR};
#    print "START PARSE $data_dir !\n";
    opendir(DATA,$data_dir) or die $!;
    my @fichiers = grep { ! /^\./ } readdir(DATA);
#    print "FICHIERS : @fichiers\n";
    foreach my $file(@fichiers)
    {
	if (!-f "$data_dir/$file") { next; }
#	print "etud ede $data_dir/$file\n";
	open(INFO,"$data_dir/$file") or die $!;
	my @data = <INFO>;
	my @resultat = &parse(\@data);
	$self->{FEEDS}{"$file"} = \@resultat;	
    }
}

sub update_parse
{
    my $self = shift;
#    print "UPDATE PARSE: lecture des feeds recus ..." if $v;
    my $feeds_dir = $self->{FEEDS_DIR};
#    print "FEED DIR: $feeds_dir\n";
    opendir(DATA,$feeds_dir);
    my @fichiers = grep { ! /^\./ } readdir(DATA);
    ID:
    foreach my $file(@fichiers)
    {
#	print "Fichier: $file\n";
	my @temp_feeds;
	
	open(INFO,"$feeds_dir/$file");
	my @data = <INFO>;
	my @resultat = &parse(\@data);
	for my $i(0..$#resultat)
	{
#	print "element en cours: ".(keys %{$self->{FEEDS}{$file}[0]})."\n";
	if ($self->{FEEDS}{$file}[0]{'title'} and $resultat[$i]{'title'} eq $self->{FEEDS}{$file}[0]{'title'}) 
	{ 
	    last ; 
	}
	else { $resultat[$i]{'read'} = 0; }
	
	if (ref(${$self->{FEEDS}{"$file"}}[0]) ne "HASH") 
	{ 
	    ${$self->{FEEDS}{"$file"}}[0] = $resultat[$i]; 
	}
	else {
	push(@temp_feeds,$resultat[$i]);	 
	}
	}
	unshift(@{$self->{FEEDS}{"$file"}}, @temp_feeds);	 

    }
        foreach my $file(keys %{$self->{FEEDS}})
	{
	while(scalar @{$self->{FEEDS}{$file}} > $self->{SAVE_OLD}) { pop(@{$self->{FEEDS}{$file}}) }
	}    
    
    }


sub c_texte()
{
    my $self = shift;
    return $self->{C_TEXTE};
}

sub titles
{
    my $self = shift;
    my @titles=();
    foreach my $file(keys %{$self->{FEEDS}})
    {
	foreach my $arr(@{$self->{FEEDS}{$file}})
	{
	    push(@titles,${$arr}{'title'}) if ${$arr}{'title'};
	}    
    }
    return @titles;
}

sub generate_c_texte
{	
    my $self = shift;
    $self->{C_TEXTE} =();
    $self->{NEWS_TITLE_REF} = ();
    my $compteur  = 0;
    foreach my $file(keys %{$self->{FEEDS}})
    {
	my @file = split '',$file;
	foreach my $arr(@{$self->{FEEDS}{$file}})
	{
	    if (${$arr}{'title'} and ${$arr}{'read'} == 0) 
	    {  
		push(@{$self->{NEWS_TITLE_REF}},$compteur);
		${$self->{NEWS_URL_REF}{"$compteur"}} = ${$arr}{'link'};
#		print "URL: ${$arr}{'link'} \n";
		${$arr}{'title'} =~ s/^<\!\[CDATA\[(.*)\]\]>$/$1/g;  
		
		foreach(@file)
		{ 
		    push (@{$self->{C_TEXTE}},"<b>".$_."</b>"); 
		    $compteur++;
		}
#		print "\n";
    	        push(@{$self->{C_TEXTE}},":"," ");		
		$compteur++;
		my @special=();
		foreach(split '',${$arr}{'title'})
		{
		if ($_ eq "&" and !@special) { @special=("&"); next; } 
		elsif(@special) {  push(@special,$_); }
		if(@special and $_ eq ";") { push(@{$self->{C_TEXTE}},join("",@special)); @special=(); $compteur++;}
		elsif(!@special) {
		push(@{$self->{C_TEXTE}},$_); 
		$compteur++;
		}
		}
	    push(@{$self->{C_TEXTE}}," ","-","-"," ");

	    }	    
	}
    }
    if (!$self->{C_TEXTE}) { @{$self->{C_TEXTE}} = ("N","O","T","H","I","N","G"," ","N","E","W"," !"); }
#    print join(" ",@{$self->{C_TEXTE}});
    return $self->{C_TEXTE};
}



sub update_file($)
{
    my $self = shift;
    my $data_dir = $self->{DATA_DIR};
#    print "Execution de l update_files\n";
#    print "DATA DIR $data_dir\n";
    foreach my $file(keys %{$self->{FEEDS}})
    {
 #  	print "Mise  a jour du fichier $data_dir/$file \n";
	open(WRITE,">$data_dir/$file") or die("Error $! \n");
	my @arr = @{$self->{FEEDS}{$file}};
	if ($self->{SAVE_OLD} == 0) { $self->{SAVE_OLD} = scalar  @arr; }
	foreach my $key(@arr[0.. (scalar @arr > $self->{SAVE_OLD} ? $self->{SAVE_OLD} -1 : (scalar @arr)-1)])
	{
	    if (!%{$key})  { next; }
	    print WRITE "<item>\n";
	    foreach(keys %{$key}) 
	    {  
		if (ref(${$key}{$_}) eq "ARRAY") 
		{ 
		    print WRITE  "<$_>"; 
		    foreach my $t(@{$key}{$_}) 	   
			{ 
			    foreach my $j(@{$t}) 
			    { 
				if ($j) { print WRITE $j;   }
			    } 
			}
		    print WRITE   "</$_>\n"; 
		}
		else 
		{ 
		    print WRITE  "<$_>".${$key}{$_}."</$_>\n"; 
#		    print "<$_>${$key}{$_}</$_>\n"; 		    

	    }
	    }
	    print WRITE  "</item>\n\n\n";
	}
	close WRITE;
    }
}




sub get_files()
{
    my $self = shift;
    my $file = $self->{CONFIG_FILE};
    print "Telechargement des nouveaux flux RSS: \n"  if $v;    
#    print "FILE : $file\n";
    open(FILES,$file) or die $!;
    my @files = <FILES>;
    close FILES;
    my $counter=0;

    
    foreach my $data_file(@files)
    {

    my ($emplacement,$file) = split(' ',$data_file);
#    print "File : $file\n";
    chomp($file);
    chomp($emplacement);
#    print "Etude de $file\n";
    $counter++;
    my ($sock,$contenu);
    my ($addr,$path,$fichier) = $file =~ /^https?:\/\/(.+?)\/(.*\/)?(.+?)$/;
    $path = $path ? $path : "";
    
    if (!$addr or !$fichier) { warn("PB avec $data_file avec $addr et $fichier !\n"); next; }
    
    print "\tDownloading from $addr: " if $v;    
    $sock = IO::Socket::INET->new (
    	PeerAddr=>"$addr",
    	PeerPort=>'http(80)',
    	Proto=>'tcp'
    	) or next;
    $sock -> autoflush;    
    my $requete = "GET /$path$fichier HTTP/1.1\r\nHost: $addr\r\nUser-Agent: tfeserver Parser\r\n\r\n";
    print $sock $requete;
    open(FICHIER,">".${$self}{FEEDS_DIR}."/$emplacement") or warn "Impossible d ouvrir la sortie (".${$self}{FEEDS_DIR}."/$emplacement) en ecriture \n";
    
    my @temp = ();
    my $i=0;
    my $chunked=0;
    my $chunk_count=0;
    my $chunk_reprise=0;
    my $encoding = 0;
    my $started=0;
    my $check=1;
    while(defined($_ = <$sock>))
    {
    	$contenu = $_;
	if (!$started)
	{
	if ($contenu =~ /HTTP\/1.1 404/i) { print "Erreur 404 : $data_file!\n\n";  }
	if ($contenu =~ /Transfer-Encoding: chunked/) { print "CHUNKED ! " if $v; $chunked=1;  }
	if ($contenu =~ /Content-Length: (\d+)/i) { $chunk_count=$1; }	
	if ($contenu =~ /Connection: close/i) { $check=0; }		
	if ($contenu =~ /Charset=(.*)/i) { $encoding=$1; }	
	if ($contenu =~ /^\r?$/) { $started=1; }
	}
	else	{
	if (!$chunked)	{
        $chunk_count-= length($contenu);	
        $contenu =~ s/\r//g;
        $contenu =~ s/\n//g;	
	if ($encoding =~ /utf\-(\d+)/i)	{ $contenu = Unicode::String::utf8("$contenu")->latin1;	}	
	push(@temp,$contenu); 
	if ($chunk_count <= 0 and $check) { close $sock; last;}
	}
	else	{

	    if ($chunk_count<=0)	    {
		$contenu =~ s/[^a-f0-9]//g;	    
		$chunk_count = hex $contenu;
		if ($chunk_count <= 0 && length($contenu)>0) {  close $sock; last; }
		$chunk_reprise=1;
	    }
	    else   {
	    $chunk_count-= length($contenu);
	    if ($encoding =~ /utf\-(\d+)/i  && $contenu)	{ $contenu = Unicode::String::utf8("$contenu")->latin1; }
            $contenu =~ s/\r//g;
            $contenu =~ s/\n//g;	
	    if ($chunk_reprise) {  
		if( @temp) { $temp[$#temp].=$contenu; }
		else { push(@temp,$contenu); }
		$chunk_reprise=0; 
	    }
	    else   {  push(@temp,$contenu); }
	}
	}
	}
#	$i++;
    
    }
    print FICHIER join("\n",@temp);
    close FICHIER;    
    close $sock;
    print "\tOK\n"  if $v;


    }
    print "Fin Mise a jour\n\n" if $v;
} 


                                                                                                        




sub conf()
{
    my $self = shift;
    my  $window = Gtk2::Window->new('toplevel') ; 
    $window->set_title ("Affichage des flux") ;
    $window->set_default_size(600 ,400 ) ; 
    $window->show();

    my $hbox = Gtk2::VBox->new(1,0);
    $window->add($hbox);
    $hbox->show();
    
    my @titres;
    foreach my $elt($self->{FEEDS})
    {
	my %contenu = %{$elt};
	
    	foreach my $cle(keys  %contenu) 
	    { 
		push(@titres, Gtk2::Label->new());
		$titres[$#titres]->set_markup("<b>$cle</b>");
		foreach my $element(@{$self->{FEEDS}->{$cle}})
		{
		if (!$element) { next; }
		
		if (!$element->{title}) { 
		    foreach(keys (%{$element})) { print "ERROR: $_\n"; }
		}
		else
		{
		push(@titres, Gtk2::Label->new(($element->{read} ? "" : "(NEW)")."\t$element->{title}"));
#		$titres[$#titres]->set_justify('left');
		}
		}
	    }
    }
    foreach(@titres)
    {	
#	$_->set_justify('left');
	$hbox->pack_start($_,1,0,0);
	$_->show();
    }

    
    
}

1;