root/Cheetah/FileUtils.py

Revision f17b49bd2a9cb5c693518283252cdbca4d04136b, 11.0 kB (checked in by Jason Michalski <armooo@armooo.net>, 2 years ago)

Lets try the import again

  • Property mode set to 100644
Line 
1 #!/usr/bin/env python
2 # $Id: FileUtils.py,v 1.12 2005/11/02 22:26:07 tavis_rudd Exp $
3 """File utitilies for Python:
4
5 Meta-Data
6 ================================================================================
7 Author: Tavis Rudd <tavis@damnsimple.com>
8 License: This software is released for unlimited distribution under the
9          terms of the MIT license.  See the LICENSE file.
10 Version: $Revision: 1.12 $
11 Start Date: 2001/09/26
12 Last Revision Date: $Date: 2005/11/02 22:26:07 $
13 """
14 __author__ = "Tavis Rudd <tavis@damnsimple.com>"
15 __revision__ = "$Revision: 1.12 $"[11:-2]
16
17
18 from glob import glob
19 import os
20 from os import listdir
21 import os.path
22 import re
23 from types import StringType
24 from tempfile import mktemp
25
26 def _escapeRegexChars(txt,
27                      escapeRE=re.compile(r'([\$\^\*\+\.\?\{\}\[\]\(\)\|\\])')):
28     return escapeRE.sub(r'\\\1' , txt)
29
30 def findFiles(*args, **kw):
31     """Recursively find all the files matching a glob pattern.
32
33     This function is a wrapper around the FileFinder class.  See its docstring
34     for details about the accepted arguments, etc."""
35    
36     return FileFinder(*args, **kw).files()
37            
38 def replaceStrInFiles(files, theStr, repl):
39
40     """Replace all instances of 'theStr' with 'repl' for each file in the 'files'
41     list. Returns a dictionary with data about the matches found.
42
43     This is like string.replace() on a multi-file basis.
44
45     This function is a wrapper around the FindAndReplace class. See its
46     docstring for more details."""
47    
48     pattern = _escapeRegexChars(theStr)
49     return FindAndReplace(files, pattern, repl).results()
50
51 def replaceRegexInFiles(files, pattern, repl):
52
53     """Replace all instances of regex 'pattern' with 'repl' for each file in the
54     'files' list. Returns a dictionary with data about the matches found.
55
56     This is like re.sub on a multi-file basis.
57
58     This function is a wrapper around the FindAndReplace class. See its
59     docstring for more details."""
60
61     return FindAndReplace(files, pattern, repl).results()
62
63
64 ##################################################
65 ## CLASSES
66
67 class FileFinder:
68    
69     """Traverses a directory tree and finds all files in it that match one of
70     the specified glob patterns."""
71    
72     def __init__(self, rootPath,
73                  globPatterns=('*',),
74                  ignoreBasenames=('CVS','.svn'),
75                  ignoreDirs=(),
76                  ):
77        
78         self._rootPath = rootPath
79         self._globPatterns = globPatterns
80         self._ignoreBasenames = ignoreBasenames
81         self._ignoreDirs = ignoreDirs
82         self._files = []
83        
84         self.walkDirTree(rootPath)
85            
86     def walkDirTree(self, dir='.',
87                    
88                     listdir=os.listdir,
89                     isdir=os.path.isdir,
90                     join=os.path.join,
91                     ):
92
93         """Recursively walk through a directory tree and find matching files."""
94         processDir = self.processDir
95         filterDir = self.filterDir
96        
97         pendingDirs = [dir]
98         addDir = pendingDirs.append
99         getDir = pendingDirs.pop
100        
101         while pendingDirs:
102             dir = getDir()
103             ##  process this dir
104             processDir(dir)
105            
106             ## and add sub-dirs
107             for baseName in listdir(dir):
108                 fullPath = join(dir, baseName)
109                 if isdir(fullPath):
110                     if filterDir(baseName, fullPath):
111                         addDir( fullPath )
112
113     def filterDir(self, baseName, fullPath):
114        
115         """A hook for filtering out certain dirs. """
116        
117         return not (baseName in self._ignoreBasenames or
118                     fullPath in self._ignoreDirs)
119    
120     def processDir(self, dir, glob=glob):
121         extend = self._files.extend
122         for pattern in self._globPatterns:
123             extend( glob(os.path.join(dir, pattern)) )
124    
125     def files(self):
126         return self._files
127
128 class _GenSubberFunc:
129
130     """Converts a 'sub' string in the form that one feeds to re.sub (backrefs,
131     groups, etc.) into a function that can be used to do the substitutions in
132     the FindAndReplace class."""
133    
134     backrefRE = re.compile(r'\\([1-9][0-9]*)')
135     groupRE = re.compile(r'\\g<([a-zA-Z_][a-zA-Z_]*)>')
136    
137     def __init__(self, replaceStr):
138         self._src = replaceStr
139         self._pos = 0
140         self._codeChunks = []
141         self.parse()
142
143     def src(self):
144         return self._src
145        
146     def pos(self):
147         return self._pos
148    
149     def setPos(self, pos):
150         self._pos = pos
151
152     def atEnd(self):
153         return self._pos >= len(self._src)
154
155     def advance(self, offset=1):
156         self._pos += offset
157
158     def readTo(self, to, start=None):
159         if start == None:
160             start = self._pos
161         self._pos = to
162         if self.atEnd():
163             return self._src[start:]
164         else:
165             return self._src[start:to]
166
167     ## match and get methods
168         
169     def matchBackref(self):
170         return self.backrefRE.match(self.src(), self.pos())
171
172     def getBackref(self):
173         m = self.matchBackref()
174         self.setPos(m.end())
175         return m.group(1)
176        
177     def matchGroup(self):
178         return self.groupRE.match(self.src(), self.pos())
179
180     def getGroup(self):
181         m = self.matchGroup()
182         self.setPos(m.end())
183         return m.group(1)
184
185     ## main parse loop and the eat methods
186     
187     def parse(self):
188         while not self.atEnd():
189             if self.matchBackref():
190                 self.eatBackref()
191             elif self.matchGroup():
192                 self.eatGroup()
193             else:
194                 self.eatStrConst()
195                
196     def eatStrConst(self):
197         startPos = self.pos()
198         while not self.atEnd():
199             if self.matchBackref() or self.matchGroup():
200                 break
201             else:
202                 self.advance()
203         strConst = self.readTo(self.pos(), start=startPos)
204         self.addChunk(repr(strConst))
205    
206     def eatBackref(self):
207         self.addChunk( 'm.group(' + self.getBackref() + ')' )
208
209     def eatGroup(self):
210         self.addChunk( 'm.group("' + self.getGroup() + '")' )
211    
212     def addChunk(self, chunk):
213         self._codeChunks.append(chunk)
214
215     ## code wrapping methods
216
217     def codeBody(self):
218         return ', '.join(self._codeChunks)
219
220     def code(self):
221         return "def subber(m):\n\treturn ''.join([%s])\n" % (self.codeBody())
222    
223     def subberFunc(self):
224         exec self.code()
225         return subber
226
227
228 class FindAndReplace:
229    
230     """Find and replace all instances of 'patternOrRE' with 'replacement' for
231     each file in the 'files' list. This is a multi-file version of re.sub().
232
233     'patternOrRE' can be a raw regex pattern or
234     a regex object as generated by the re module. 'replacement' can be any
235     string that would work with patternOrRE.sub(replacement, fileContents).
236     """
237    
238     def __init__(self, files, patternOrRE, replacement,
239                  recordResults=True):
240
241        
242         if type(patternOrRE) == StringType:
243             self._regex = re.compile(patternOrRE)
244         else:
245             self._regex = patternOrRE
246         if type(replacement) == StringType:
247             self._subber = _GenSubberFunc(replacement).subberFunc()
248         else:
249             self._subber = replacement
250
251         self._pattern = pattern = self._regex.pattern
252         self._files = files
253         self._results = {}
254         self._recordResults = recordResults
255
256         ## see if we should use pgrep to do the file matching
257         self._usePgrep = False
258         if (os.popen3('pgrep')[2].read()).startswith('Usage:'):
259             ## now check to make sure pgrep understands the pattern
260             tmpFile = mktemp()
261             open(tmpFile, 'w').write('#')
262             if not (os.popen3('pgrep "' + pattern + '" ' + tmpFile)[2].read()):
263                 # it didn't print an error msg so we're ok
264                 self._usePgrep = True
265             os.remove(tmpFile)
266
267         self._run()
268
269     def results(self):
270         return self._results
271    
272     def _run(self):
273         regex = self._regex
274         subber = self._subDispatcher
275         usePgrep = self._usePgrep
276         pattern = self._pattern
277         for file in self._files:
278             if not os.path.isfile(file):
279                 continue # skip dirs etc.
280             
281             self._currFile = file
282             found = False
283             if locals().has_key('orig'):
284                 del orig
285             if self._usePgrep:
286                 if os.popen('pgrep "' + pattern + '" ' + file ).read():
287                     found = True
288             else:
289                 orig = open(file).read()
290                 if regex.search(orig):
291                     found = True
292             if found:
293                 if not locals().has_key('orig'):
294                     orig = open(file).read()
295                 new = regex.sub(subber, orig)
296                 open(file, 'w').write(new)
297
298     def _subDispatcher(self, match):
299         if self._recordResults:
300             if not self._results.has_key(self._currFile):
301                 res = self._results[self._currFile] = {}
302                 res['count'] = 0
303                 res['matches'] = []
304             else:
305                 res = self._results[self._currFile]
306             res['count'] += 1
307             res['matches'].append({'contents':match.group(),
308                                    'start':match.start(),
309                                    'end':match.end(),
310                                    }
311                                    )
312         return self._subber(match)
313
314
315 class SourceFileStats:
316
317     """
318     """
319    
320     _fileStats = None
321    
322     def __init__(self, files):
323         self._fileStats = stats = {}
324         for file in files:
325             stats[file] = self.getFileStats(file)
326
327     def rawStats(self):
328         return self._fileStats
329
330     def summary(self):
331         codeLines = 0
332         blankLines = 0
333         commentLines = 0
334         totalLines = 0
335         for fileStats in self.rawStats().values():
336             codeLines += fileStats['codeLines']
337             blankLines += fileStats['blankLines']
338             commentLines += fileStats['commentLines']
339             totalLines += fileStats['totalLines']
340            
341         stats = {'codeLines':codeLines,
342                  'blankLines':blankLines,
343                  'commentLines':commentLines,
344                  'totalLines':totalLines,
345                  }
346         return stats
347        
348     def printStats(self):
349         pass
350
351     def getFileStats(self, fileName):
352         codeLines = 0
353         blankLines = 0
354         commentLines = 0
355         commentLineRe = re.compile(r'\s#.*$')
356         blankLineRe = re.compile('\s$')
357         lines = open(fileName).read().splitlines()
358         totalLines = len(lines)
359        
360         for line in lines:
361             if commentLineRe.match(line):
362                 commentLines += 1
363             elif blankLineRe.match(line):
364                 blankLines += 1
365             else:
366                 codeLines += 1
367
368         stats = {'codeLines':codeLines,
369                  'blankLines':blankLines,
370                  'commentLines':commentLines,
371                  'totalLines':totalLines,
372                  }
373        
374         return stats
Note: See TracBrowser for help on using the browser.