Package nltk_lite :: Package contrib :: Package classify
[hide private]
[frames] | no frames]

Source Code for Package nltk_lite.contrib.classify

  1  # Natural Language Toolkit: Classifiers 
  2  # 
  3  # Copyright (C) 2001-2007 University of Pennsylvania 
  4  # Author: Sam Huston <shuston@csse.unimelb.edu.au> 
  5  #         Steven Bird <sb@csse.unimelb.edu.au> 
  6  # URL: <http://nltk.sf.net> 
  7  # For license information, see LICENSE.TXT 
  8  # 
  9   
 10  """ 
 11  """ 
 12   
 13  from operator import itemgetter 
 14   
15 -class ClassifyI:
16
17 - def train(self, gold_standard):
18 """ 19 @param gold_standard: maps class name to representative samples 20 @ret: nothing if successful 21 """ 22 raise NotImplementedError()
23
24 - def get_class(self, tokens):
25 """ 26 @param tokens: sample to be classified 27 @ret: only the most probable class name 28 """ 29 raise NotImplementedError()
30
31 - def get_class_list(self, tokens):
32 """ 33 @param tokens: sample to be classified 34 @ret: a list of all classes in order of most likely to least likely class 35 """ 36 raise NotImplementedError()
37
38 - def get_class_probs(self, tokens):
39 """ 40 @param tokens: sample to be classified 41 @ret: DictionaryProbDist of class name and probability 42 see nltk_lite.probability.py 43 """ 44 raise NotImplementedError()
45
46 - def get_class_tuples(self, tokens):
47 """ 48 @param tokens: sample to be classified 49 @ret: dictionary of class names to probability 50 """ 51 raise NotImplementedError()
52 53 54
55 -class AbstractClassify(ClassifyI):
56
57 - def classes():
58 """ 59 @ret: the set of known classes 60 """ 61 return self._classes
62
63 - def get_class(self, text):
64 """ 65 @param text: sample to be classified 66 @ret: most probable class 67 """ 68 (cls, prob) = self.get_class_tuples(text)[0] 69 return cls
70
71 - def get_class_list(self, text):
72 """ 73 @param text: sample to be classified 74 @ret: ordered list of classification results 75 """ 76 tuplelist = self.get_class_tuples(text) 77 return [cls for (cls,prob) in tuplelist]
78
79 - def get_class_tuples(self, text):
80 """ 81 @param text: sample to be classified 82 @ret: an ordered list of tuples 83 """ 84 tmp = self.get_class_dict(text) 85 return sorted([(cls, tmp[cls]) for cls in tmp], 86 key=itemgetter(1), reverse=True)
87
88 - def get_class_probs(self, text):
89 """ 90 @param text: sample to be classified 91 @ret: a normalised probability dictionary 92 see probability.py 93 """ 94 95 return DictionaryProbDist(self.get_class_dict(text), normalize=True)
96 97 98 99 ##////////////////////////////////////////////////////// 100 ## Helper Functions 101 ##////////////////////////////////////////////////////// 102 103
104 -def classifier_accuracy(classifier, gold):
105 106 correct = 0 107 for cls in gold: 108 if classifier.get_class(gold[cls]) == cls: 109 correct += 1 110 return float(correct) / len(gold)
111 112 113 from cosine import * 114 from naivebayes import * 115 from spearman import * 116