Package nltk_lite :: Package contrib :: Package classifier :: Module cfile
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.classifier.cfile

 1  # Natural Language Toolkit - File 
 2  #  Understands operations on files and the various input files extensions 
 3  # 
 4  # Author: Sumukh Ghodke <sumukh dot ghodke at gmail dot com> 
 5  # 
 6  # URL: <http://nltk.sf.net> 
 7  # This software is distributed under GPL, for license information see LICENSE.TXT 
 8   
 9  from nltk_lite.contrib.classifier.exceptions import filenotfounderror as fnf, invaliddataerror as inv 
10  import os, os.path 
11   
12  DOT = '.' 
13   
14 -class File:
15 - def __init__(self, path, extension):
16 self.path = path + DOT + extension
17
18 - def for_each_line(self, method):
19 self.__check_for_existence() 20 fil = open(self.path, 'r') 21 returned = [] 22 for line in fil: 23 filtered = filter_comments(line) 24 if len(filtered) == 0: 25 continue 26 returned.append(method(filtered)) 27 fil.close() 28 return returned
29
30 - def __check_for_existence(self):
31 if not os.path.isfile(self.path): 32 raise fnf.FileNotFoundError(self.path)
33
34 - def create(self, overwrite = False):
35 if not overwrite and os.path.exists(self.path): 36 raise inv.InvalidDataError('File or Directory exists at ' + self.path + ' and overwrite is set to false.') 37 if os.path.exists(self.path): 38 if os.path.isfile(self.path): 39 os.remove(self.path) 40 else: 41 raise inv.InvalidDataError('Cannot overwrite directory ' + self.path + '.') 42 fil = open(self.path, 'w') 43 fil.close()
44
45 - def write(self, lines):
46 self.__check_for_existence() 47 fil = open(self.path, 'w') 48 for line in lines: 49 fil.write(line) 50 fil.write('\n') 51 fil.close()
52
53 -def filter_comments(line):
54 index = line.find('|') 55 if index == -1: 56 return line.strip() 57 return line[:index].strip()
58
59 -def name_extension(file_name):
60 dot_index = file_name.rfind(DOT) 61 if dot_index == -1: 62 return [file_name, ''] 63 return [file_name[:dot_index], file_name[dot_index+1:]]
64