はてぶのプライベート率
30分プログラム、その50。「信用して下さい」系メディアと「検証して下さい」系メディア - アンカテのように、はてブのプライベート率を調べてみよう。
$ python private.py http://www.100shiki.com/archives/2007/06/ipmyipneighbors.html URL: http://www.100shiki.com/archives/2007/06/ipmyipneighbors.html Public: 198 Private: 70 Rate: 26.1% $ python2.5 private.py http://phpspot.org/blog/archives/2007/06/macos_xuijavasc.html Public: 27 Private: 19 Rate: 41.3%
import sys import urllib from HTMLParser import * class HatenaEntryParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.data_mode = 'none' def find(self,list,key): def f(r,x): if r == None and key == x[0]: return x[1] else: return r return reduce(f,list,None) def handle_starttag(self,tag,attrs): className = self.find(attrs,'class') if className == 'private-count': self.data_mode = 'private' elif className == 'public-count': self.data_mode = 'public' else: self.data_mode = None def handle_data(self,data): if self.data_mode == 'public': self.public = int(data) elif self.data_mode == 'private': self.private = int(data) def handle_endtag(self,tag): self.data_mode = None if len(sys.argv) > 1: urls = sys.argv[1:] else: urls = sys.stdin for url in urls: io = urllib.urlopen('http://b.hatena.ne.jp/entry/%s' % url) parser = HatenaEntryParser() parser.feed(io.read()) print "URL: %s" % url print "Public: %s" % parser.public print "Private: %s" % parser.private print "Rate: %.1f%%" % (float(parser.private) / (parser.public+parser.private)*100) print ""