# Copyright (C) 2001,2004 Stichting LogReport Foundation logreport@logreport.org

# This file is part of Lire.

# Lire is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program (see COPYING); if not, check with
# http://www.gnu.org/copyleft/gpl.html or write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

# Author:
#   Francis J. Lacoste <flacoste@logreport.org>

package Lire::DlfAnalysers::URLCategoriser;

use strict;

use base qw/Lire::DlfCategoriser/;

sub new {
    return bless {}, shift;
}

sub name {
    return "www-url";
}

sub title {
    return "URL DlfAnalyser";
}

sub description {
    return '<para>This categoriser extracts <structfield>requested_page_ext</structfield>, <structfield>requested_file</structfield> and <structfield>directory</structfield> information from the URL contained in <structfield>requested_page</structfield>.</para>';
}

sub src_schema {
    return "www";
}

sub dst_schema {
    return "www-url";
}

sub initialise {
    my ( $self, $config ) = @_;

    return;
}

sub categorise {
    my ( $self, $dlf ) = @_;

    return unless defined $dlf->{'requested_page'};

    # Remove query string to find real file
    my $i = index $dlf->{'requested_page'}, '?';
    $dlf->{'requested_file'} = ( $i < 0
                                 ? $dlf->{'requested_page'}
                                 : substr( $dlf->{'requested_page'}, 0, $i) );

    # Parse file extension
    $i = rindex $dlf->{'requested_file'}, '.';
    if ( $i >= 0 ) {
        $dlf->{'requested_page_ext'} = substr $dlf->{'requested_file'}, $i + 1;
        $dlf->{'requested_page_ext'} = undef
          unless length $dlf->{'requested_page_ext'};
    }

    $dlf->{'directory'} = $dlf->{'requested_file'};
    $dlf->{'directory'} =~ s|/[^/]*$||;
    $dlf->{'directory'} ||= '/';

    return;
}

# keep perl happy
1;
