package Lire::DataTypes;

use strict;

use base qw/ Exporter /;

use Carp;

use vars qw/ @EXPORT_OK %EXPORT_TAGS %VALIDATORS /;

BEGIN {
    @EXPORT_OK = qw/ check_xml_name check_nmtoken
                     check_bool check_int check_number
                     count2number number2count check_string eval_bool
                     is_numeric_type is_quantity_type
                     check_timestamp check_time check_date check_duration
                     duration2sec sec2duration is_numeric_type is_time_type
                     minutely_duration hourly_duration daily_duration
                     weekly_duration monthly_duration yearly_duration
                     check_ip check_port check_hostname check_url check_email
                     check_bytes size2bytes bytes2size check_filename
                     is_numeric_type
                     check_superservice check_field check_chart check_type
                     format_numeric_type /;
};

use Lire::Config;

# We don't set this at compile time, since the subs have to be defined
# first
%VALIDATORS = (
	       'xml_name'	=> \&check_xml_name,
	       'nmtoken'	=> \&check_nmtoken,

	       'bool'	=> \&check_bool,
	       'int'	=> \&check_int,
	       'number'	=> \&check_number,
	       'string'	=> \&check_string,

	       'timestamp' => \&check_timestamp,
	       'date'	=> \&check_date,
	       'time'	=> \&check_time,
	       'duration'	=> \&check_duration,

	       'ip'	=> \&check_ip,
               'port'     => \&check_port,
	       'hostname'	=> \&check_hostname,
	       'url'	=> \&check_url,
	       'email'	=> \&check_email,

	       'bytes'	=> \&check_bytes,
	       'filename'	=> \&check_filename,

	       'type'	=> \&check_type,
	       'field'	=> \&check_field,
	       'superservice' => \&check_superservice,
	       'chart'	=> \&check_chart,
	      );

#
# XML Types
#
sub check_xml_name {
    unless (defined $_[0]) {
        warn "check_xml_name called with undef arg\n";
        return undef;
    }

    return scalar $_[0] =~ /^[_a-zA-Z][-\w:.]*$/;
}

sub check_nmtoken {
    return scalar $_[0] =~ /\^[-\w:.]+$/;
}

#
# Basic Types
#
sub check_bool {
    return scalar $_[0] =~ m!^(|0|1|true|false|yes|no|t|f)$!i
}

sub eval_bool {
    my $bool = $_[0];

    if ( $bool =~ /^(1|true|yes|t)$/i) {
	return 1;
    } elsif ( $bool =~ /^(|0|false|no|n)$/i ) {
	return 0;
    } else {
	croak "invalid boolean value: $bool";
    }

    return;
}

sub check_int {
    return scalar $_[0] =~ /^[-+]?\s*\d+$/;
}

sub check_string {
    return defined $_[0];
}

sub check_number {
    return scalar $_[0] =~ /^[-+]?\s*\d+(\.\d+)?/;
}

sub count2number {
    my ($q, $unit ) = $_[0] =~ /^([.\d]+)\s*(([gG]igas?)?|([mM]egas?)?|([kK]ilos?)?)?/;
    if (! defined $unit) {
	return $q;
    } elsif ( $unit =~ /^m/i) {
	return $q * 1_000_000;
    } elsif ( $unit =~ /^g/i) {
	return $q * 1_000_000_000;
    } elsif ( $unit =~ /^k/i) {
	return $q * 1_000;
    } else {
	return $q; # Unknown units are interpreted as raw number
    }
}

sub number2count {
    my $number = $_[0];

    # Prevent warnings with n/a or other such value
    return $number unless $number =~ /^[\d.]+$/;

    my ( $div, $units );
    if ( $number >= 1_000_000_000 ) {
	$div = 1_000_000_000;
	$units = "G";
    } elsif ($number >= 1_000_000 ) {
	$div = 1_000_000;
	$units = "M";
    } elsif ( $number >= 1_000 ) {
	$div = 1_000;
	$units = "k";
    } else {
	$div = 1;
	$units = "";
    }

    my $q = $number / $div;
    my $int = int $q;
    my $rem = $q - $int;

    return $rem ? sprintf( "%.1f$units", $q ) : $int . $units;
}

#
# Time Types
#
sub check_timestamp {
    return scalar check_string();
}

sub check_time {
    return scalar check_string();
}

sub check_date {
    return scalar check_string();
}

sub check_duration {
    my $dur = $_[0];

    return 0 unless defined $dur;
    return scalar $_[0] =~ /^\s*\d+\s*
                             ((y(ears?)?     )|   # Years
                              (M(onths?)?    )|   # Months
                              (w(eeks?)?     )|   # Weeks
                              (d(ays?)?      )|   # Days
                              (h(ours?)?     )|   # Hours
                              (m(inu?t?e?s?)?)|   # Minutes
                              (s(eco?n?d?s?)?))?  # Seconds
                             \s*$/x;
}

sub minutely_duration {
    return (defined $_[0] && $_[0] =~ /^(\d+)m(?:inu?t?e?s?)?$/ ) ? $1 : 0;
}

sub hourly_duration {
    return (defined $_[0] && $_[0] =~ /^(\d+)h(?:ours?)?$/ ) ? $1 : 0;
}

sub daily_duration {
    return (defined $_[0] && $_[0] =~ /^(\d+)d(?:ays?)?$/ ) ? $1 : 0;
}

sub weekly_duration {
    return (defined $_[0] && $_[0] =~ /^(\d+)w(?:eeks?)?$/ ) ? $1 : 0;

}

sub monthly_duration {
    return (defined $_[0] && $_[0] =~ /^(\d+)M(?:onths?)?$/ ) ? $1 : 0;
}

sub yearly_duration {
    return (defined $_[0] && $_[0] =~ /^(\d+)y(?:ears?)?$/ ) ? $1 : 0;
}

sub duration2sec {
    $_[0] =~ /^
	      (?:(\d+)y(?:ears?)?    )?\s*	# Years
	      (?:(\d+)M(?:onths?)?    )?\s*	# Months
	      (?:(\d+)w(?:eeks?)?     )?\s*	# Weeks
	      (?:(\d+)d(?:ays?)?      )?\s*	# Days
	      (?:(\d+)h(?:ours?)?     )?\s*	# Hours
	      (?:(\d+)m(?:inu?t?e?s?)?)?\s*	# Minutes
	      (?:(\d+)(s(?:eco?n?d?s?)?)?)?\s*	# Seconds
	      $/x;
    my ( $years, $months,  $weeks,  $days,   $hours,  $mins, $secs ) =
       ( $1 || 0, $2 || 0, $3 || 0, $4 || 0, $5 || 0, $6 || 0, $7 || 0 );
    my $s = 0;
    $s += $secs;
    $s += $mins * 60;
    $s += $hours * 60 * 60;
    $s += $days * 60 * 60 * 24;
    $s += $weeks * 60 * 60 * 24 * 7;
    $s += $months * 60 * 60 * 24 * 30; # Months and
    $s += $years * 60 * 60 * 24 * 365; # Years are approximation

    return $s;
}

sub sec2duration {
    my $sec = $_[0];

    # Prevent warnings with n/a or other such value
    return $sec unless $sec =~ /^[\d.]+$/;

    my ( $div, $units );
    if ( $sec >= 86400 ) {
	$div = 86400;
	$units = "d";
    } elsif ($sec >= 3600 ) {
	$div = 3600;
	$units = "h";
    } elsif ( $sec >= 60 ) {
	$div = 60;
	$units = "m";
    } else {
	$div = 1;
	$units = "s";
    }

    my $q = $sec / $div;
    my $int = int $q;
    my $rem = $q - $int;

    return $rem ? sprintf( "%.1f$units", $q ) : $int . $units;
}

#
# Internet Types
#

sub check_ip {
    my @bytes = split /^\.$/, $_[0];

    return 0 if @bytes != 4;

    for my $b ( @bytes ) {
	return 0 if $b < 0 || $b > 255;
    }

    return 1;
}

sub check_port {
    return 0 unless check_number();

    # 2**16 = 65536 : 16 bit port field, cf Stevens, p4
    return 0 if $_[0] < 0 || $_[0] > 65535;
    return 1;
}

sub check_hostname {
    return scalar $_[0] =~ /^[-\w]+(\.([-\w]+))*\.?$/;
}

sub check_url {
    return scalar check_string();
}

sub check_email {
    return scalar check_string();
}

#
# Misc Types
#

sub check_bytes {
    return scalar $_[0] =~ /^[.\d]+\s*([mM](egs?)?|[kK]|[gG](igas?)?)?/;
}

sub size2bytes {
    my ($q, $unit ) = $_[0] =~ /^([.\d]+)\s*([mM](egs?)?|[kK]|[gG](igas?)?)?/;
    if (! defined $unit) {
	return $q;
    } elsif ( $unit =~ /^m/i) {
	return $q * 1024 * 1024;
    } elsif ( $unit =~ /^g/i) {
	return $q * 1024 * 1024 * 1024;
    } elsif ( $unit =~ /^k/i) {
	return $q * 1024;
    } else {
	return $q; # Unknown units are interpreted as bytes
    }
}

use constant GIG => 1024 ** 3;
use constant MEG => 1024 ** 2;
use constant K   => 1024;

sub bytes2size {
    my $bytes = $_[0];

    # Prevent warnings with n/a or other such value
    return $bytes unless $bytes =~ /^[\d.]+$/;

    my ( $div, $units );
    if ( $bytes >= GIG ) {
	$div = GIG;
	$units = "G";
    } elsif ($bytes >= MEG ) {
	$div = MEG;
	$units = "M";
    } elsif ( $bytes >= K ) {
	$div = K;
	$units = "k";
    } else {
	$div = 1;
	$units = "";
    }

    my $q = $bytes / $div;
    my $int = int $q;
    my $rem = $q - $int;

    return $rem ? sprintf( "%.1f$units", $q ) : $int . $units;
}

sub check_filename {
    return scalar check_string();
}

#
# Special Types
#
sub check_type {
    return 0 unless defined $_[0];
    return scalar  $_[0] =~ /^(id|bool|int|number|string	# Basic
                              |timestamp|time|date|duration	# Time
                              |ip|port|hostname|url|email	# Internet
                              |bytes|filename			# Misc
                              |type|field|superservice|chart	# Special
                              )$/x;
}

sub check_field {
    return scalar check_xml_name( @_ );
}

sub check_superservice {
    return scalar $_[0] =~ /^\w+$/;
}

sub check_chart {
    return scalar $_[0] =~ /^(bars|lines|pie|histogram)$/;
}

sub is_time_type {
    return scalar $_[0] =~ /^(timestamp|time|date)$/;
}

sub is_numeric_type {
    return scalar is_time_type( @_ ) || scalar is_quantity_type( @_ );
}

sub is_quantity_type {
    return scalar $_[0] =~ /^(bytes|int|number|duration)$/;
}

sub format_numeric_type {
    my ($value, $type) = @_;

    return 'NaN' unless defined $value;

    $type ||= "number";

    my $fmt_value = $value;
    if ( $type eq 'bytes' ) {
	$fmt_value = bytes2size( $value )
	  if Lire::Config->get( 'lr_scale_bytes' );
    } elsif ( $type eq 'duration' ) {
	$fmt_value = sec2duration( $value )
	  if Lire::Config->get( 'lr_scale_seconds' );
    } elsif ( Lire::Config->get( 'lr_scale_numbers' ) ) {
	$fmt_value = number2count( $value );
    }

    return $fmt_value;
}

# keep perl happy
1;

__END__

=pod

=head1 NAME

Lire::DataTypes - check wether value is of declared type

=head1 SYNOPSIS

 use Lire::DataTypes qw/ size2bytes duration2sec ... /;

 $bytes = size2bytes( $min_value );
 $sec = duration2sec( $max_value );
 ...

=head1 DESCRIPTION

Lire::DataTypes offers several routines, like check_url and check_duration, to
decide wether a value is of a Lire type.  Furthermore, routines to process
these types, like duration2sec and bytes2size are offered.  Lire types are
defined in lire-types.mod.

This module is widely used throughout other Lire:: modules.

=head1 AUTHOR

Francis J. Lacoste <flacoste@logreport.org>

=head1 VERSION

$Id: DataTypes.pm,v 1.43 2004/05/17 17:05:19 wsourdeau Exp $

=head1 COPYRIGHT

Copyright (C) 2001 Stichting LogReport Foundation LogReport@LogReport.org

This file is part of Lire.

Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software 
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=cut
