from reverend.thomas import Bayes import cPickle data = cPickle.load(open('IssueTrackerTexts2.pickle', 'rb')) guesser = Bayes() from random import randint def result_sorter(seq): seq.sort(lambda x, y: cmp(x[1], y[1])) test_samples = [] for issueid, item in data.items(): sections = item['sections'] if 'General' in sections: sections.remove('General') if not sections: continue if randint(1,10)==1: test_samples.append(item) continue for section in sections: guesser.train(section, item['description']) guesser.train(section, item['title']) for test_sample in test_samples: text = test_sample['description'] sections = test_sample['sections'] print sections result = guesser.guess(text)[:5] result.sort(lambda x, y: cmp(y[1], x[1])) # x,y : y,x reverses sort for name, num in result: print "%.5f\t%s" % (num, name) print