?? contribulyze.py
字號:
#!/usr/bin/env python# See usage() for details, or run with --help option.# # .-------------------------------------------------.# | "An ad hoc format deserves an ad hoc parser." |# `-------------------------------------------------'# # Some Subversion project log messages include parseable data to help# track who's contributing what. The exact syntax is described in# hacking.html#crediting, but here's an example, indented by three# spaces, i.e., the "Patch by:" starts at the beginning of a line:## Patch by: David Anderson <david.anderson@calixo.net># <justin@erenkrantz.com># me# (I wrote the regression tests.)# Found by: Phineas T. Phinder <phtph@ph1nderz.com># Suggested by: Snosbig Q. Ptermione <sqptermione@example.com># Review by: Justin Erenkrantz <justin@erenkrantz.com># rooneg# (They caught an off-by-one error in the main loop.)## This is a pathological example, but it shows all the things we might# need to parse. We need to:## - Detect the officially-approved "WORD by: " fields.# - Grab every name (one per line) in each field.# - Handle names in various formats, unifying where possible.# - Expand "me" to the committer name for this revision.# - Associate a parenthetical aside following a field with that field.## NOTES: You might be wondering, why not take 'svn log --xml' input?# Well, that would be the Right Thing to do, but in practice this was# a lot easier to whip up for straight 'svn log' output. I'd have no# objection to it being rewritten to take XML input.import osimport sysimport reimport shutilimport getoptfrom urllib import quote as url_encode# Pretend we have true booleans on older python versionstry: Trueexcept: True = 1 False = 0# Warnings and errors start with these strings. They are typically# followed by a colon and a space, as in "%s: " ==> "WARNING: ".warning_prefix = 'WARNING'error_prefix = 'ERROR'def complain(msg, fatal=False): """Print MSG as a warning, or if FATAL is true, print it as an error and exit.""" prefix = 'WARNING: ' if fatal: prefix = 'ERROR: ' sys.stderr.write(prefix + msg + '\n') if fatal: sys.exit(1)def html_spam_guard(addr): """Return a spam-protected version of email ADDR that renders the same in HTML as the original address.""" return "".join(map(lambda x: "<span>&#%d;</span>" % ord(x), addr))def escape_html(str): """Return an HTML-escaped version of STR.""" return str.replace('&', '&').replace('<', '<').replace('>', '>')_spam_guard_in_html_block_re = re.compile(r'<([^&]*@[^&]*)>')def _spam_guard_in_html_block_func(m): return "<%s>" % html_spam_guard(m.group(1))def spam_guard_in_html_block(str): """Take a block of HTML data, and run html_spam_guard() on parts of it.""" return _spam_guard_in_html_block_re.subn(_spam_guard_in_html_block_func, str)[0] def html_header(title): """Write HTML file header. TITLE parameter is expected to already by HTML-escaped if needed.""" s = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n' s += ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n' s += '<html><head>\n' s += '<meta http-equiv="Content-Type"' s += ' content="text/html; charset=UTF-8" />\n' s += '<title>%s</title>\n' % title s += '</head>\n\n' s += '<body style="text-color: black; background-color: white">\n\n' s += '<h1 style="text-align: center">%s</h1>\n\n' % title s += '<hr />\n\n' return sdef html_footer(): return '\n</body>\n</html>\n'class Contributor: # Map contributor names to contributor instances, so that there # exists exactly one instance associated with a given name. # Fold names with email addresses. That is, if we see someone # listed first with just an email address, but later with a real # name and that same email address together, we create only one # instance, and store it under both the email and the real name. all_contributors = { } # See __hash__() for why this is necessary. hash_value = 1 def __init__(self, username, real_name, email): """Instantiate a contributor. Don't use this to generate a Contributor for an external caller, though, use .get() instead.""" self.real_name = real_name self.username = username self.email = email self.is_committer = False # Assume not until hear otherwise. self.is_full_committer = False # Assume not until hear otherwise. # Map verbs (e.g., "Patch", "Suggested", "Review") to lists of # LogMessage objects. For example, the log messages stored under # "Patch" represent all the revisions for which this contributor # contributed a patch. self.activities = { } # Sigh. self.unique_hash_value = Contributor.hash_value Contributor.hash_value += 1 def add_activity(self, field_name, log): """Record that this contributor was active in FIELD_NAME in LOG.""" logs = self.activities.get(field_name) if not logs: logs = [ ] self.activities[field_name] = logs if not log in logs: logs.append(log) def get(username, real_name, email): """If this contributor is already registered, just return it; otherwise, register it then return it. Hint: use parse() to generate the arguments.""" c = None for key in username, real_name, email: if key and Contributor.all_contributors.has_key(key): c = Contributor.all_contributors[key] break # If we didn't get a Contributor, create one now. if not c: c = Contributor(username, real_name, email) # If we know identifying information that the Contributor lacks, # then give it to the Contributor now. if username: if not c.username: c.username = username Contributor.all_contributors[username] = c if real_name: if not c.real_name: c.real_name = real_name Contributor.all_contributors[real_name] = c if email: if not c.email: c.email = email Contributor.all_contributors[email] = c # This Contributor has never been in better shape; return it. return c get = staticmethod(get) def score(self): """Return a contribution score for this contributor.""" # Right now we count a patch as 2, anything else as 1. score = 0 for activity in self.activities.keys(): if activity == 'Patch': score += len(self.activities[activity]) * 2 else: score += len(self.activities[activity]) return score def score_str(self): """Return a contribution score HTML string for this contributor.""" patch_score = 0 other_score = 0 for activity in self.activities.keys(): if activity == 'Patch': patch_score += len(self.activities[activity]) else: other_score += len(self.activities[activity]) if patch_score == 0: patch_str = "" elif patch_score == 1: patch_str = "1 patch" else: patch_str = "%d patches" % patch_score if other_score == 0: other_str = "" elif other_score == 1: other_str = "1 non-patch" else: other_str = "%d non-patches" % other_score if patch_str: if other_str: return ", ".join((patch_str, other_str)) else: return patch_str else: return other_str def __cmp__(self, other): if self.is_full_committer and not other.is_full_committer: return 1 if other.is_full_committer and not self.is_full_committer: return -1 result = cmp(self.score(), other.score()) if result == 0: return cmp(self.big_name(), other.big_name()) else: return 0 - result def __hash__(self): """See LogMessage.__hash__() for why this exists.""" return self.hash_value def parse(name): """Parse NAME, which can be - A committer username, or - A space-separated real name, or - A space-separated real name followed by an email address in angle brackets, or - Just an email address in angle brackets. Return a tuple of (committer_username, real_name, email_address) any of which can be None if not available in NAME.""" username = None real_name = None email = None name_components = name.split() if len(name_components) == 1: name = name_components[0] # Effectively, name = name.strip() if name[0] == '<' and name[-1] == '>': email = name[1:-1] elif name.find('@') != -1: email = name else: username = name elif name_components[-1][0] == '<' and name_components[-1][-1] == '>': real_name = ' '.join(name_components[0:-1]) email = name_components[-1][1:-1] else: real_name = ' '.join(name_components) return username, real_name, email parse = staticmethod(parse) def canonical_name(self): """Return a canonical name for this contributor. The canonical name may or may not be based on the contributor's actual email address. The canonical name will not contain filename-unsafe characters. This method is guaranteed to return the same canonical name every time only if no further contributions are recorded from this contributor after the first call. This is because a contribution may bring a new form of the contributor's name, one which affects the algorithm used to construct canonical names.""" retval = None if self.username: retval = self.username elif self.email: # Take some rudimentary steps to shorten the email address, to # make it more manageable. If this is ever discovered to result # in collisions, we can always just use to the full address. try: at_posn = self.email.index('@') first_dot_after_at = self.email.index('.', at_posn) retval = self.email[0:first_dot_after_at] except ValueError: retval = self.email elif self.real_name: # Last resort: construct canonical name based on real name. retval = ''.join(self.real_name.lower().split(' ')) if retval is None: complain('Unable to construct a canonical name for Contributor.', True) return url_encode(retval, safe="!#$&'()+,;<=>@[]^`{}~") def big_name(self, html=False): """Return as complete a name as possible for this contributor.""" name_bits = [] if self.real_name: if html: name_bits.append(escape_html(self.real_name)) else: name_bits.append(self.real_name) if self.email: if not self.real_name and not self.username: name_bits.append(self.email) elif html: name_bits.append("<%s>" % html_spam_guard(self.email)) else: name_bits.append("<%s>" % self.email) if self.username: if not self.real_name and not self.email: name_bits.append(self.username) else: name_bits.append("(%s)" % self.username) return " ".join(name_bits) def __str__(self): s = 'CONTRIBUTOR: ' s += self.big_name() s += "\ncanonical name: '%s'" % self.canonical_name() if len(self.activities) > 0: s += '\n ' for activity in self.activities.keys(): val = self.activities[activity] s += '[%s:' % activity for log in val: s += ' %s' % log.revision s += ']' return s def html_out(self, revision_url_pattern, filename): """Create an HTML file named FILENAME, showing all the revisions in which this contributor was active.""" out = open(filename, 'w') out.write(html_header(self.big_name(html=True))) unique_logs = { } sorted_activities = self.activities.keys() sorted_activities.sort() out.write('<div class="h2" id="activities" title="activities">\n\n') out.write('<table border="1">\n') out.write('<tr>\n') for activity in sorted_activities: out.write('<td>%s</td>\n\n' % activity) out.write('</tr>\n') out.write('<tr>\n') for activity in sorted_activities: out.write('<td>\n') first_activity = True for log in self.activities[activity]: s = ',\n' if first_activity: s = '' first_activity = False out.write('%s<a href="#%s">%s</a>' % (s, log.revision, log.revision)) unique_logs[log] = True out.write('</td>\n')
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -