#
# antirefspamfilter.rb
#
# Copyright (c) 2004 T.Shimomura <redbug@netlife.gr.jp>
#

=begin

ver 0.9 2004/11/24
	󥯸ִꥹȤ˥ޥå󥯸ꤹ뵡ǽɲ (thanks to Shun-ichi TAHARA)
	Ȥ¤ɽȤ褦ˤ
	HTTP.version_1_2 ϤȤʤäưäԶ
	spamips ˽Ϥʬ/ʬäΤ
	¾顼ˤ褦˽ѹ

ver 0.8 2004/11/15
	ץСꤹ뵡ǽɲ
	ver 0.6m0.71 ǡRuby 1.6 Ϥǥ顼Ф뤳ȤäԶ

ver 0.71 2004/11/12
	ver 0.6m  ver 0.7 ǡ"ꤹ󥯸" λ꤬ŬѤʤʤäƤԶ

ver 0.7 2004/11/11
	ΥƥʤǡˤäƹʤȤäн
	ȥѥн褹뤿ᡢȤ¤򤫤뵡ǽɲ

ver 0.6m 2004/11/07
	Υǥѹ
	if notunlessؤν񤭴

ver 0.6 2004/11/07
	ȥåץڡURLʳεƤǤ褦ˤ
	̤θ꥽ʬ䤷

ver 0.5 2004/10/31
	ǤURL ɽȤ褦ˤ
	safeurls, spaurls ˡƱ URL Ϣ³ǵϿн褷(Ĥ)

ver 0.4 2004/10/20
	Ruby 1.8.2 (preview2) ưʤäԶ
	³ݡȤ80uri.portѹ (thanks to MoonWolf)

ver 0.3 2004/09/30
	٤򲼤뤿νäȤ줿

ver 0.2 2004/09/27
	ǤURLΰ̤ѹǤ褦ˤ

ver 0.1 2004/09/15
	ǽΥС

=end

require 'net/http'
require 'uri'

module TDiary
  module Filter

    class AntirefspamFilter < Filter
      def debug_out(filename, str)
        if $debug
          filename = File.join(@conf.data_path,"AntiRefSpamFilter",filename)
          File::open(filename, "a+") {|f|
            f.puts str
          }
        end
      end

      # str ˻ꤵ줿ʸŬڤʥޤǤ뤫å
      def check(str)
        # str ˥ȥåץڡURLޤޤƤ뤫ɤ
        unless @conf.index_page.empty?
          if str.include? @conf.index_page
            return true
          end
        end

        # str ˵Ƥ褬ޤޤƤ뤫ɤ
        if (myurl = @conf['antirefspam.myurl']) && !myurl.empty?
          if str.include? myurl
            return true
          end
          
          url = myurl.gsub("/", "\\/").gsub(":", "\\:")
          exp = Regexp.new(url)
          if exp =~ str
            return true
          end
        end
        return false
      end

      def referer_filter(referer)
        # 󥯸̵
        unless referer
          return true
        end
        # Υƥʤǹ郎ʤʤб뤿ᡢ󥯸ʸξϵƤ
        if referer.size <= 1
          return true
        end

        @work_path = File.join(@conf.data_path,"AntiRefSpamFilter")
        @spamurl_list = File.join(@work_path,"spamurls")  # referer spam Υ󥯸
        @spamip_list  = File.join(@work_path,"spamips")   # referer spam IP
        @safeurl_list = File.join(@work_path,"safeurls")  # 餯Τʤ󥯸

        # ʬ⤫Υ󥯤Ͽꤹ
        if check(referer)
          return true
        end

        # ǤURL ˹פ뤫
        if trustedurls=@conf['antirefspam.trustedurl']
          trustedurls.to_s.each_line do |trusted|
            trusted.sub!(/\r?\n/,'')
            next if trusted=~/\A(\#|\s*)\z/
            
            # ޤ "Ǥ URL"  referer ˴ޤޤ뤫ɤ
            if referer.include? trusted
              debug_out("trusted1", trusted+" --- "+referer)
              return true
            end
            
            # ޤޤʤä "Ǥ URL" ɽȤߤʤƺƥå
            begin
              url = trusted.gsub("/", "\\/").gsub(":", "\\:")
              exp = Regexp.new(url)
              
              if referer =~ exp
                debug_out("trusted2", trusted+" --- "+referer)
                return true
              end
            rescue
              debug_out("error_config", trusted)
            end
          end
        end

        # URLִꥹȤ򸫤
        if @conf['antirefspam.checkreftable'] != nil
          if @conf['antirefspam.checkreftable'].to_s == 'true'
            @conf.referer_table.each do |url, name|
              begin
                if /#{url}/i =~ referer
                  debug_out("trusted3", url+" --- "+referer)
                  return true
                end
              rescue
                debug_out("error_config", url)
              end
            end
          end
        end

        # 
        unless File.exist? @work_path
          Dir::mkdir(@work_path)
        end
        unless File.exist? @spamurl_list
          File::open(@spamurl_list, "a").close
        end
        unless File.exist? @safeurl_list
          File::open(@safeurl_list, "a").close
        end

        uri = URI.parse(referer)
        # åˤоݤΥɥᥤ̾äե
        begin
          File::open(File.join(@work_path,uri.host), File::RDONLY | File::CREAT | File::EXCL).close

          #  SPAM URL Ȥߤʤʸϰʸϵ
          spamurls = IO::readlines(@spamurl_list).map {|url| url.chomp }
          if spamurls.include? referer
            return false
          end

          #  SPAM URL ǤʤȽǤʸϵ
          safeurls = IO::readlines(@safeurl_list).map {|url| url.chomp }
          if safeurls.include? referer
            return true
          end

          # 󥯸 URL  HTML ĥäƤ
          Net::HTTP.version_1_2   # ޤʤ餷
          proxy_server = nil
          proxy_port = nil
          unless @conf['antirefspam.proxy_server'].empty?
            proxy_server = @conf['antirefspam.proxy_server']
            proxy_port = @conf['antirefspam.proxy_port']
          end
          body = ""
          begin
            Net::HTTP::Proxy(proxy_server, proxy_port).start(uri.host, uri.port) do |http|
              if uri.path == ""
                response, = http.get("/")
              else
                response, = http.get(uri.request_uri)
              end
              body = response.body
            end

            # body  URL ޤޤƤʤ SPAM Ȥߤʤ
            unless check(body)
              File::open(@spamurl_list, "a+") {|f|
                f.puts referer
              }
              File::open(@spamip_list, "a+") {|f|
                f.puts [@cgi.remote_addr, Time.now.utc.strftime("%Y/%m/%d %H:%M:%S UTC")].join("\t")
              }
              return false
            else
              File::open(@safeurl_list, "a+") {|f|
                f.puts referer
              }
            end
          rescue
            # 顼Ф @spamurl_list ʤ󥯸ˤʤ
            return false
          end

        rescue StandardError, TimeoutError
          # ߥåʤ顢ϥ󥯸˴ꤷʤ
          return false
        ensure
          begin
            File::delete(File.join(@work_path,uri.host))
          rescue
          end
        end

        return true
      end

      def comment_filter( diary, comment )
        # ĥåߤܸ(Ҥ餬/)ޤޤƤʤԵ
        if @conf['antirefspam.comment_kanaonly'] != nil
          if @conf['antirefspam.comment_kanaonly'].to_s == 'true'
            unless comment.body =~ /[--]/
              return false
            end
          end
        end

        # ĥåߤʸꤷ°ǤʤʤԵ
        maxsize = @conf['antirefspam.comment_maxsize'].to_i
        if maxsize > 0
          unless comment.body.size <= maxsize
            return false
          end
        end

        # NGɤĤǤޤޤƤԵ
        if @conf['antirefspam.comment_ngwords'] != nil
          ngwords = @conf['antirefspam.comment_ngwords']
          ngwords.to_s.each_line do |ngword|
            ngword.sub!(/\r?\n/,'')
            if comment.body.downcase.include? ngword.downcase
              return false
            end

            # ޤޤʤä "NG" ɽȤߤʤƺƥå
            begin
              if comment.body =~ Regexp.new( ngword, Regexp::MULTILINE )
                return false
              end
            rescue
              debug_out("error_config2", ngword)
            end
          end
        end

        return true
      end
    end
  end
end
