はてぶのプライベート率

30分プログラム、その50。「信用して下さい」系メディアと「検証して下さい」系メディア - アンカテのように、はてブのプライベート率を調べてみよう。

$ python private.py http://www.100shiki.com/archives/2007/06/ipmyipneighbors.html
URL: http://www.100shiki.com/archives/2007/06/ipmyipneighbors.html
Public:  198
Private: 70
Rate:    26.1%

$ python2.5 private.py http://phpspot.org/blog/archives/2007/06/macos_xuijavasc.html
Public:  27
Private: 19
Rate:    41.3%
import sys
import urllib
from HTMLParser import *
 
class HatenaEntryParser(HTMLParser):
    def __init__(self):
	HTMLParser.__init__(self)
	self.data_mode = 'none'
 
    def find(self,list,key):
	def f(r,x):
	    if r == None and key == x[0]:
		return x[1]
	    else:
		return r
	return reduce(f,list,None)
 
    def handle_starttag(self,tag,attrs):
	className = self.find(attrs,'class')
	if className == 'private-count':
	    self.data_mode = 'private'
	elif className == 'public-count':
	    self.data_mode = 'public'
	else:
	    self.data_mode = None
 
    def handle_data(self,data):
	if self.data_mode == 'public':
	    self.public = int(data)
	elif self.data_mode == 'private':
	    self.private = int(data)
 
    def handle_endtag(self,tag):
	self.data_mode = None
 
if len(sys.argv) > 1:
    urls = sys.argv[1:]
else:
    urls = sys.stdin
for url in urls:
    io = urllib.urlopen('http://b.hatena.ne.jp/entry/%s' % url)
    parser = HatenaEntryParser()
    parser.feed(io.read())
    print "URL: %s" % url
    print "Public:  %s" % parser.public
    print "Private: %s" % parser.private
    print "Rate:    %.1f%%" % (float(parser.private) / (parser.public+parser.private)*100)
    print ""