# Copyright (C) 2002,2004 Stichting LogReport Foundation logreport@logreport.org

# This file is part of Lire.

# Lire is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program (see COPYING); if not, check with
# http://www.gnu.org/copyleft/gpl.html or write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

# Author:
#   Francis J. Lacoste <flacoste@logreport.org>

package Lire::DlfAnalysers::UserSessionAnalyser;

use strict;

use constant SESSION_EXPIRES => 30 * 60; # 30 minutes

my $NOT_PAGE_RE = qr/\.(png|jpg|jpeg|gif|css)$/i;

use base qw/Lire::DlfAnalyser/;

use Lire::DlfQuery;

sub new {
    return bless {}, shift;
}

sub name {
    return "www-user_session";
}

sub title {
    return "Attack DlfAnalyser";
}

sub description {
    return '<para>This analyser creates
<structname>www-user_session</structname> DLF records. User
session are constructed based on unique combination of
<structfield>>client_host</structfield> and
<structfield>useragent</structfield>. Two requests from the same
unique id will be classified as two sessions if more than 30
minutes elasped between the two requests.</para>';

}

sub src_schema {
    return "www";
}

sub dst_schema {
    return "www-user_session";
}

sub analyse {
    my ( $self, $process, $config ) = @_;

    my $query = new Lire::DlfQuery( 'www' );
    foreach my $field ( qw/dlf_id time requested_page client_host useragent/) {
        $query->add_field( $field );
    }
    $query->set_sort_spec( 'client_host useragent time' );
    my $filter = $process->source_filter();
    $query->set_filter_clause( $filter->sql_expr(), @{$filter->sql_params()} )
      if defined $filter;

    my $result = $query->execute( $process->dlf_store() );
    my $last_session_id = '';
    my $session;
    while ( defined( my $dlf = $result->next_row() ) ) {
        next unless ( defined $dlf->{'time'}
                      && defined $dlf->{'client_host'}
                      && defined $dlf->{'useragent'}
                      && defined $dlf->{'requested_page'} );

        my $session_id = $dlf->{'client_host'} . ':' . $dlf->{'useragent'};
        if ( $session_id ne $last_session_id ) {
            $self->end_session( $process, $session, $dlf )
              if defined $session;
            $session = $self->create_session( $dlf );
            $last_session_id = $session_id;
        } elsif ( $self->is_session_expired( $session, $dlf ) ) {
            $self->end_session( $process, $session, $dlf );
            $session = $self->create_session( $dlf );
        }
        $self->update_session( $session, $dlf );
    }
    $self->end_session( $process, $session, 
                        { 'time' => $session->{'session_end'} } )
      if defined $session;

    return;
}

sub create_session {
    my ( $self, $dlf ) = @_;

    my $client_host = $dlf->{'client_host'};
    my $ua	    = $dlf->{'useragent'};
    my $time	    = $dlf->{'time'};

    return
      {
       'session_id'	=> join( ":", $time, $client_host, $ua ),
       'session_start'	=> $time,
       'req_count'	=> 0,
       'page_count'     => 0,
       'visit_number'	=> $self->{'visits'}{$client_host . ':' . $ua} += 1,
       'completed'      => 'no',
       'first_page'     => 'None',
       'page_2'         => 'None',
       'page_3'         => 'None',
       'page_4'         => 'None',
       'page_5'         => 'None',
       'last_page'      => 'None',
       'dlf_ids'        => [],
      };
}

my %page2field = ( '1' => 'first_page',
                   '2' => 'page_2',
                   '3' => 'page_3',
                   '4' => 'page_4',
                   '5' => 'page_5' );
sub update_session {
    my ( $self, $session, $dlf ) = @_;

    $session->{'session_end'} = $dlf->{'time'};
    $session->{'session_length'} =
      $session->{'session_end'} - $session->{'session_start'};
    $session->{'req_count'}++;

    if ( $dlf->{'requested_page'} !~ $NOT_PAGE_RE ) {
        $session->{'page_count'}++;
        my $field = $page2field{$session->{'page_count'}};
        $session->{$field} = $dlf->{'requested_page'}
          if defined $field;
        $session->{'last_page'} = $dlf->{'requested_page'};
    }

    push @{$session->{'dlf_ids'}}, $dlf->{'dlf_id'};

    return;
}

sub is_session_expired {
    my ( $self, $session, $dlf ) = @_;

    return ( $session->{'session_end'} + SESSION_EXPIRES) < $dlf->{'time'};
}

sub end_session {
    my ( $self, $process, $session, $dlf ) = @_;

    $session->{'completed'} = 'yes'
      if $self->is_session_expired( $session, $dlf );
    $process->write_dlf( $session, $session->{'dlf_ids'} );

    return;
}

# keep perl happy
1;
