Tfe

Ongi etorri tfe-ren webgunera...

Old stuff/Perl/irssi/grabber.pl

(Deskargatu)
#!/usr/bin/perl -w

use Irssi;
use DBI;
my $bdd = DBI->connect("dbi:Pg:dbname=tfe;host=localhost","tfe","neve") or die "error connecting";



Irssi::signal_add("event privmsg","grabber");

sub grabber
{

    my ($server,$data,$from,$address) = @_;
    $server = $server->{tag};
    my $message = $data;
    $message =~ s/.+? :(.*)/$1/;
    $channel = $data;
    $channel =~ s/(\#[\S]+) :.*/$1/;
    
#    print "DATA $data\nMessage $message\nchannel  $channel \nfromm $from\n";
    if ($message =~
	/
	    (?:\W|^)
	    (
	     ((?:https?)|(?:ftp))		
	      :\/\/				
	              (
		      (?:
		       [a-z\.\-\_0-9]+ 	
			\.			
		      )?
    	    	        (
			    [a-z\-\_0-9]+?
		    	    \.

		        [a-z]{2,4}		
			)
		      )
		        (?::(\d+))?		
		    
		(
	        (?:\/|\?)
	        [a-z\/\?\(\)\~\;0-9\.\-\_\%\&\=\#]+
	        )?
	  )
	/ix
  
  )
    {

$url  = $1 ? $1 : "";
$protocol  = $2 ? $2 : "";
$fhost  = $3 ? $3 : "";
$mhost  = $4 ? $4 : "";
$port  = $5 ? $5 : "";


# $1 = url    
# $2 = protocol
# $3 = full host
# $4 = main host
# $5 = port



$sth = $bdd->prepare("SELECT * FROM url WHERE url='$url'");
$sth->execute();
$compteur=0;
while($row  = $sth->fetchrow_hashref)
{
    $row = undef;
    $compteur= 1;
}
if ($compteur ==0)
{
$sth = $bdd->prepare("INSERT INTO url
    (protocol,pseudo,channel,server,host,vhost,url,port)
    VALUES
    (?,?,?,?,?,?,?,?)
    ") or die $bdd->errstr;
$sth->execute($protocol,$from,$channel,$server,$mhost,$fhost,$url,$port) or  die $bdd->errstr;
print "Check si on prends le fichier\n";
if ($url =~ /\.(?:jpe?g|png|gif|tiff|ico)/i) 
{ 
    print "On recupere le fichier\n";
    get_files($url,$server,$channel);
    print "Fin de recup\n"; 
}
}



}












$_ = $message;










return $_;


use IO::Socket::INET;

sub get_files($)
{
    my ($file,$server,$channel) = @_;
    $channel =~ s/\W//g;
    my $emplacement= "/home/tfe/public_html/dl/images/cache";
    print "File : $file\n";
    chomp($file);
    print "Etude de $file\n";
    $counter++;
    my ($sock,$contenu);
    my ($addr,$path,$fichier) = $file =~ /^https?:\/\/(.+?)\/(.*\/)?(.+?)$/;
    $path = $path ? $path : "";
    
    if (!$addr or !$fichier) { warn("PB avec $data_file avec $addr et $fichier !\n"); next; }
    
    print "\tDownloading from $addr: " if $v;    
    $sock = IO::Socket::INET->new (
    	PeerAddr=>"$addr",
    	PeerPort=>'http(80)',
    	Proto=>'tcp'
    	) or return;
    $sock -> autoflush;    
    my $requete = "GET /$path$fichier HTTP/1.1\r\nHost: $addr\r\nUser-Agent: tfeserver Parser\r\n\r\n";
    print $sock $requete;
    
    $fichier =~ s/%([a-f0-9]{2,3})/hex(chr($1))/ei;
    if (! -d "$emplacement/$server") { mkdir("$emplacement/$server",0777); }
    if (! -d "$emplacement/$server/$channel") { mkdir("$emplacement/$server/$channel",0777); }
    while (-f "$emplacement/$server/$channel/$fichier")
    {
	$fichier = rand(9).$fichier;
    }
    open(FICHIER,">$emplacement/$server/$channel/$fichier") or warn "Impossible d ouvrir la sortie ($emplacement/$server/$channel/$fichier en ecriture \n";
    
    my @temp = ();
    my $i=0;
    my $chunked=0;
    my $chunk_count=0;
    my $chunk_reprise=0;
    my $encoding = 0;
    my $started=0;
    my $check=1;
    while(defined($_ = <$sock>))
    {
    	$contenu = $_;
	if (!$started)
	{
	if ($contenu =~ /HTTP\/1.1 404/i) { print "Erreur 404 : $data_file!\n\n";  }
	if ($contenu =~ /Transfer-Encoding: chunked/) { print "CHUNKED ! " if $v; $chunked=1;  }
	if ($contenu =~ /Content-Length: (\d+)/i) { $chunk_count=$1; }	
	if ($contenu =~ /Connection: close/i) { $check=0; }		
	if ($contenu =~ /Charset=(.*)/i) { $encoding=$1; }	
	if ($contenu =~ /^\r?$/) { $started=1; }
	}
	else	{
	    if (!$chunked)	{
		$chunk_count-= length($contenu);	
#    	        $contenu =~ s/\r//g;
#		$contenu =~ s/\n//g;	
		if ($encoding =~ /utf\-(\d+)/i)	{ $contenu = Unicode::String::utf8("$contenu")->latin1;	}	
		push(@temp,$contenu); 
		if ($chunk_count <= 0 and $check) { close $sock; last;}
	    }
	    else	{
	        if ($chunk_count<=0)	    {
		    $contenu =~ s/[^a-f0-9]//g;	    
		    $chunk_count = hex $contenu;
		    if ($chunk_count <= 0 && length($contenu)>0) {  close $sock; last; }
		    $chunk_reprise=1;
	    	}
		else   {
		    $chunk_count-= length($contenu);
		    if ($encoding =~ /utf\-(\d+)/i  && $contenu)	{ $contenu = Unicode::String::utf8("$contenu")->latin1; }
#    	    	    $contenu =~ s/\r//g;
#        	    $contenu =~ s/\n//g;	
		    if ($chunk_reprise) {  
		        if( @temp) { $temp[$#temp].=$contenu; }
			else { push(@temp,$contenu); }
		        $chunk_reprise=0; 
		    }
		    else   {  push(@temp,$contenu); }
		}
	    }
	}
#	$i++;
    
    }
    print FICHIER join("",@temp);
    close FICHIER;    
    close $sock;
    print "\tOK\n"  if $v;
    $sth = $bdd->prepare("INSERT INTO images (path,fichier) VALUES ('$emplacement/$server/$channel','$fichier')");
    $sth->execute() or die $sth->errstr;

    print "Fin Mise a jour\n\n" if $v;
} 


                                                                                                        
}