メジア〜ン

id:yshl:20060823:1156341989 の続き
はてなにアクセスする回数が減るだろうと思って、item の数が上限に達しているの場合の再取得を、 2×2個に等分からメジアンっぽい何かを基準に 4つに分けるように変えてみた。

#!/usr/bin/ruby -Ku
require 'net/http'
require 'rexml/document'
require 'cgi'
require 'time'

$name = 'sample' # ここに適当な ID を
Net::HTTP.version_1_2
include REXML

def toarray(items,str)
    array = []
    items.each do |item|
        array << item.elements[str].text.to_f
    end
    return array
end

def median(array)
    sorted = array.sort{|a,b|
        a<=>b
    }
    return 0.5*(sorted[array.size/2]+sorted[array.size/2+1])
end

def itemlist(http,maxX,minX,maxY,minY)
    responce = http.get("/#{$name}/rss?maxX=#{maxX.to_f}&maxY=#{maxY.to_f}&minX=#{minX.to_f}&minY=#{minY.to_f}")
    print "o"
    itemarray = Document.new(responce.body).elements.to_a("rss/channel/item")
    if itemarray.size>=19
        latmedian = median(toarray(itemarray,"geo:lat"))
        longmedian = median(toarray(itemarray,"geo:long"))
        i1=itemlist(http,maxX,longmedian,maxY,latmedian)
        i2=itemlist(http,maxX,longmedian,latmedian,minY)
        i3=itemlist(http,longmedian,minX,maxY,latmedian)
        i4=itemlist(http,longmedian,minX,latmedian,minY)
        return i1 | i2 | i3 | i4
    else
        return itemarray
    end
end

Net::HTTP.start("map.hatena.ne.jp",80){|http|
    items = itemlist(http,180,-180,90,-90)
    $data = items.sort {|a,b|
        Time.parse(a.elements["pubDate"].text) <=> Time.parse(b.elements["pubDate"].text)
    }
}

puts <<END
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>List of Hatena Map</title>
</head>
<body>
END
puts "<p>#{$data.size} clips</p>"
puts "<dl>"
$data.each {|item|
    link = item.elements["link"].text
    title = item.elements["title"].text
    print "<dt><a href=\"#{link}\">#{CGI::escapeHTML(title)}</a></dt>"
    lat = item.elements["geo:lat"].text
    long = item.elements["geo:long"].text
    print "<dd>lat=#{lat} long=#{long}</dd>"
    cats = item.elements.to_a("category")
    if cats.size > 0
        print "<dd>"
        cats.each do |cat|
            print "<a href=\"http://map.hatena.ne.jp/t/#{CGI::escape(cat.text)}\">#{CGI::escapeHTML(cat.text)}</a>,"
        end
        print "</dd>"
    end
    if str=item.elements["description"].text
        print "<dd>#{CGI::escapeHTML(str)}</dd>"
    end
    print "\n"
}
puts "</dl>"
puts "</body></html>"