Tfe

Ongi etorri tfe-ren webgunera...
Old stuff/Perl/svn/tfe/trunk/gtkrss_b/Parser.pm

(Deskargatu)
#!/usr/bin/perl -w
use strict;
use IO::Socket::INET;
use Data::Dumper;
use threads;
package Parser;
require Exporter;

our @ISA = qw(Exporter);
our @EXPORT = qw(get_files $save_old);

my $v =1;
my $save_old;

sub new
{
    my $self = {};
    $self->{FEEDS} = undef;
    $self->{FEEDS_DIR} = "feeds";
    $self->{CONFIG_FILE} = "config";    
    $self->{DATA_DIR} = "data";
    $self->{SAVE_OLD} = 20;

    $self->{UPDATE_TIMER} = undef;
    $self->{DEFIL_TIMER} = undef;    
    
    $self->{C_TEXTE} = [];
    $self->{NEWS_TITLE_REF} = [];
    $self->{NEWS_URL_REF} = undef;

    $self->{BROWSER} = "firefox";
        
    
    bless($self);
    return $self;
}

sub set_browser($)
{
    my $self = shift;
    $self->{BROWSER} = shift;
}

sub get_browser($)
{
    my $self = shift;
    return $self->{BROWSER};
}
sub set_save_old($)
{
    my $self = shift;
    $self->{SAVE_OLD} = shift;

}

sub set_defil_timer($)
{
    my $self = shift;
    $self->{DEFIL_TIMER} = shift;

}
sub set_update_timer($)
{
    my $self = shift;
    $self->{UPDATE_TIMER} = shift;

}

sub set_config_file($)
{
    my $self = shift;
    $self->{CONFIG_FILE} = shift;

}


sub set_data_dir($)
{
    my $self = shift;
    $self->{DATA_DIR} = shift;
}
sub set_feeds_dir($)
{
    my $self = shift;
    $self->{FEEDS_DIR} = shift;
}

sub get_news_title_ref()
{	
    my $self =shift;
    return $self->{NEWS_TITLE_REF};
}
sub get_news_url_ref()
{	
    my $self =shift;
    return $self->{NEWS_URL_REF};
}

sub get_config_file()
{	
    my $self =shift;
    return $self->{CONFIG_FILE};
}

sub get_update_timer()
{	
    my $self =shift;
    return $self->{UPDATE_TIMER};
}
sub get_defil_timer()
{	
    my $self =shift;
    return $self->{DEFIL_TIMER};
}
sub feeds()
{
    my $self = shift;
    return $self->{FEEDS};
}









sub read_all()
{
    my $self = shift;
    foreach my $file(keys %{$self->{FEEDS}})
    {
	for (0 .. scalar @{$self->{FEEDS}{$file}})
	{
	if (${$self->{FEEDS}{$file}}[$_])
	{
	${$self->{FEEDS}{$file}}[$_]{read} = 1;
	}
	
#    	print "File : $hash_ref\n";
	}
    }
}

sub read_none()
{
    my $self = shift;
    foreach my $file(keys %{$self->{FEEDS}})
    {
	for (0 .. scalar @{$self->{FEEDS}{$file}})
	{
	if (${$self->{FEEDS}{$file}}[$_])
	{
	${$self->{FEEDS}{$file}}[$_]{read} = 0;
	}
	
#    	print "File : $hash_ref\n";
	}
    }
}




sub parse(@)
{
my $parse_ref = shift;

my @parse_data = @{$parse_ref} ;

#print "RECU  : ".$parse_ref."\n";
my @titles;

my $i=-1;
my $elt;
my $opened;
my $start=0;
my $old_m;
my $old_d=0;
my $content;
my $perdu=0;

foreach(@parse_data)
{

#    print "$_ \n\touvert: $opened\n" if $opened;
#    print "baliser pour $_ ? \n" if !$opened;
    if(/<item(?:>| .*>)/) { $start = 1;   $i++; }
    elsif (/<\/item>/) { $start=0;  }
    elsif($start)
    {
	if (!$opened and /^\s*<([\w:]+)(?: .*)?>(?:<\!\[CDATA\[)?(.*)(\]\]>)?<\/\1>/) 
	    { 
	    my ($opened,$content) = ($1,$2);
	    $content =~ s/\]\]>$//;
#	    print "<$opened /> pour $_\n";	    
            $titles[$i]{"$opened"}=$content;  
	    $opened= "";	    
	    }
	elsif(!$opened and /^\s*<([\w\:]+)(?: [^>]+?)?>(?:<!\[CDATA\[)?(.+?)$/) 
	    {
	    ($opened,$content)=($1,$2);
#	    print "<$opened>$content pour $_\n";	    
	    push(@{$titles[$i]{"$opened"}},$content);
	    }
	elsif($opened && /(.+?(?:)\]\]>)?<\/$opened>/) {
#	    print "</$opened> pour $_\n";
	    push(@{$titles[$i]{"$opened"}},$1); 
	    $opened="";
	    }
	elsif($opened) 
	    { 	    push(@{$titles[$i]{"$opened"}},$_); 
	    }
	else 
	    { 
	    $perdu++;
	    }
    
    }   
}
#print "($perdu unused line) " if $perdu;
return @titles;
}



sub start_parse
{
    my $self = shift;
    my $data_dir = $self->{DATA_DIR};
#    print "START PARSE $data_dir !\n";
    opendir(DATA,$data_dir) or die $!;
    my @fichiers = grep { ! /^\./ } readdir(DATA);
#    print "FICHIERS : @fichiers\n";
    foreach my $file(@fichiers)
    {
	if (!-f "$data_dir/$file") { next; }
#	print "etud ede $data_dir/$file\n";
	open(INFO,"$data_dir/$file") or die $!;
	my @data = <INFO>;
	my @resultat = &parse(\@data);
	$self->{FEEDS}{"$file"} = \@resultat;	
    }
}

sub update_parse
{
    my $self = shift;
#    print "UPDATE PARSE: lecture des feeds recus ..." if $v;
    my $feeds_dir = $self->{FEEDS_DIR};
#    print "FEED DIR: $feeds_dir\n";
    opendir(DATA,$feeds_dir);
    my @fichiers = grep { ! /^\./ } readdir(DATA);
    ID:
    foreach my $file(@fichiers)
    {
	my @temp_feeds;
	
	open(INFO,"$feeds_dir/$file");
	my @data = <INFO>;
	my @resultat = &parse(\@data);
	for my $i(0..$#resultat)
	{
	if ($self->{FEEDS}{$file}[0]{'title'} and $resultat[$i]{'title'} eq $self->{FEEDS}{$file}[0]{'title'}) 
	{ 
#	    print "Fin a $feeds{$file}[0]{'title'}\n";
	    last ; 
	}
	else { $resultat[$i]{'read'} = 0; }
#	print "Ajout de $resultat[$i]{'title'} <=> $feeds{$file}[0]{'title'}\n";
	
	if (ref(${$self->{FEEDS}{"$file"}}[0]) ne "HASH") 
	{ 
#	    print "PAS BON! \n";
	    ${$self->{FEEDS}{"$file"}}[0] = $resultat[$i]; 
	}
	else {
#	    print "Ajout !\n";
#	push(@{$feeds{"$file"}},$resultat[$i]);	 
	push(@temp_feeds,$resultat[$i]);	 
	}
	}
	unshift(@{$self->{FEEDS}{"$file"}}, @temp_feeds);	 

    }
#	print "OK\nUPDATE PARSE: suppression des vieux elements ... \n"  if $v;
        foreach my $file(keys %{$self->{FEEDS}})
	{
#	print "\t $file: ".scalar @{$self->{FEEDS}{$file}}." / $self->{SAVE_OLD}\n"  if $v;	
	while( scalar @{$self->{FEEDS}{$file}} > $self->{SAVE_OLD}) { pop(@{$self->{FEEDS}{$file}}) }
#	print "\n";
	}    
    
    }


sub c_texte()
{
    my $self = shift;
    return $self->{C_TEXTE};
}

sub titles
{
    my $self = shift;
    my @titles=();
    foreach my $file(keys %{$self->{FEEDS}})
    {
	foreach my $arr(@{$self->{FEEDS}{$file}})
	{
	    push(@titles,${$arr}{'title'}) if ${$arr}{'title'};
	}    
    }
    return @titles;
}

sub generate_c_texte
{	
    my $self = shift;
    $self->{C_TEXTE} =();
    $self->{NEWS_TITLE_REF} = ();
    my $compteur  = 0;
    foreach my $file(keys %{$self->{FEEDS}})
    {
	my @file = split '',$file;
	foreach my $arr(@{$self->{FEEDS}{$file}})
	{
	    if (${$arr}{'title'} and ${$arr}{'read'} == 0) 
	    {  
		push(@{$self->{NEWS_TITLE_REF}},$compteur);
		${$self->{NEWS_URL_REF}{"$compteur"}} = ${$arr}{'link'};
#		print "URL: ${$arr}{'link'} \n";
		${$arr}{'title'} =~ s/^<\!\[CDATA\[(.*)\]\]>$/$1/g;  
		
		foreach(@file)
		{ 
		    push (@{$self->{C_TEXTE}},"<b>".$_."</b>"); 
		    $compteur++;
		}
#		print "\n";
    	        push(@{$self->{C_TEXTE}},":"," ");		
		$compteur++;
		my @special=();
		foreach(split '',${$arr}{'title'})
		{
		if ($_ eq "&" and !@special) { @special=("&"); next; } 
		elsif(@special) {  push(@special,$_); }
		if(@special and $_ eq ";") { push(@{$self->{C_TEXTE}},join("",@special)); @special=(); $compteur++;}
		elsif(!@special) {
		push(@{$self->{C_TEXTE}},$_); 
		$compteur++;
		}
		}
	    push(@{$self->{C_TEXTE}}," ","-","-"," ");

	    }	    
	}
    }
    if (!$self->{C_TEXTE}) { @{$self->{C_TEXTE}} = ("N","O","T","H","I","N","G"," ","N","E","W"," !"); }
#    print join(" ",@{$self->{C_TEXTE}});
    return $self->{C_TEXTE};
}



sub update_file($)
{
    my $self = shift;
    my $data_dir = $self->{DATA_DIR};
#    print "DATA DIR $data_dir\n";
    foreach my $file(keys %{$self->{FEEDS}})
    {
#    	print "Mise  a jour du fichier $data_dir/$file \n";
	open(WRITE,">$data_dir/$file") or die("Error $! \n");
	my @arr = @{$self->{FEEDS}{$file}};
	if ($self->{SAVE_OLD} == 0) { $self->{SAVE_OLD} = scalar  @arr; }
	foreach my $key(@arr[0.. (scalar @arr > $self->{SAVE_OLD} ? $self->{SAVE_OLD}-1 : scalar @arr-1)])
	{
	    print WRITE "<item>\n";
	    foreach(keys %{$key}) 
	    {  
		if (ref(${$key}{$_}) eq "ARRAY") 
		{ 
		    print WRITE  "<$_>"; 
		    foreach my $t(@{$key}{$_}) 	   
			{ 
			    foreach my $j(@{$t}) 
			    { 
				if ($j) { print WRITE $j;   }
			    } 
			}
		    print WRITE   "</$_>\n"; 
		}
		else 
		{ 
		    print WRITE  "<$_>".${$key}{$_}."</$_>\n"; 
#		    print "<$_>${$key}{$_}</$_>\n"; 		    

	    }
	    }
	    print WRITE  "</item>\n\n\n";
	}
	close WRITE;
    }
}




sub get_files($)
{
    my $self = shift;
    my $file = $self->{CONFIG_FILE};
    print "Telechargement des nouveaux flux RSS: \n"  if $v;    
#    print "FILE : $file\n";
    open(FILES,$file) or die $!;
    my @files = <FILES>;
    close FILES;
    my $counter=0;

    
    foreach my $data_file(@files)
    {

    my ($emplacement,$file) = split(' ',$data_file);
#    print "File : $file\n";
    chomp($file);
    chomp($emplacement);
#    print "Etude de $file\n";
    $counter++;
    my ($sock,$contenu);
    my ($addr,$path,$fichier) = $file =~ /^https?:\/\/(.+?)\/(.*\/)?(.+?)$/;
    $path = $path ? $path : "";
    
    if (!$addr or !$fichier) { warn("PB avec $data_file avec $addr et $fichier !\n"); next; }
    
    print "\tDownloading from $addr: " if $v;    
    $sock = IO::Socket::INET->new (
    	PeerAddr=>"$addr",
    	PeerPort=>'http(80)',
    	Proto=>'tcp'
    	) or next;
    $sock -> autoflush(1);    
#    print "Connection : OK\n$file\n";    
    my $requete = "GET /$path$fichier HTTP/1.1\nHost: $addr\n\n";
#    print "REQUETE : $requete\n";
    print $sock $requete;
    open(FICHIER,">".${$self}{FEEDS_DIR}."/$emplacement") or warn "Impossible d ouvrir la sortie en ecriture \n";
    
    my @temp = ();
    my $i=0;
    my $chunked=0;
    my $chunk_count=0;
    my $chunk_reprise=0;
    my $encoding = 0;
    
    foreach(<$sock>)
    {
    	$contenu = $_;
	
	if ($contenu =~ /^HTTP\/1.1 404/i) { print "Erreur 404 : $data_file!\n\n"; next;  }
	if ($contenu =~ /^Transfer-Encoding: chunked/) { print "CHUNKED ! ";$chunked=1; next; }
	if ($contenu =~ /^Content-Type: /) { next; }
	if ($contenu =~ /^charset=(.*)/) { $encoding=$1 ; next; }	

	if (!$chunked)
	{
        $contenu =~ s/\r//g;
        $contenu =~ s/\n//g;	
	push(@temp,$contenu); 
	}
	else
	{
#	    print "compteur : $chunk_count\n";
	    if ($chunk_count<=0)
	    {
		$contenu =~ s/[^a-f0-9]//g;	    
		$chunk_count = hex $contenu;
#		print "$chunk_count, ";
		$chunk_reprise=1;
	    }
	    else 
	    {
#	    print "Ajout de $contenu\n";
    	    $chunk_count-= length($contenu);
            $contenu =~ s/\r//g;
            $contenu =~ s/\n//g;	
	    
	    if ($chunk_reprise) {  
		$temp[$#temp].=$contenu;
		$chunk_reprise=0; 
	    }
	    else   {   push(@temp,$contenu); }
	}
	}
	$i++;
    
    }
    print "...";	 
    print FICHIER join("\n",@temp);
#    print "Fin du sock\n";
    close FICHIER;    
    close $sock;
#    print "Fin du fichier en cours\n";
    print "\tOK\n"  if $v;


    }
    print "Fin Mise a jour\n\n" if $v;
#    print "Fin fonction\n";    
} 


                                                                                                        
                                                                                                           
                                                                                                           
			                                                                                                           
			            




1;