#!/usr/bin/env python # Originally modified from A. Israel's script seen at # https://gist.github.com/aisrael/b2b78d9dfdd176a232b9 """To run this script first install the dependencies virtualenv v source v/bin/activate pip install githubpy GitPython requests Generate a github access token; this is needed as the anonymous access to Github's API will easily hit the limit even with a single invocation. For details see: https://help.github.com/articles/creating-an-access-token-for-command-line-use/ Next either set the github token as an env variable `GITHUB_ACCESS_TOKEN` or alternatively invoke the script with `--token` switch. Example: ceph-release-notes -r tags/v0.87..origin/giant \ $(git rev-parse --show-toplevel) """ from __future__ import print_function import argparse import github import os import re import sys import requests from git import Repo fixes_re = re.compile(r"Fixes\:? #(\d+)") reviewed_by_re = re.compile(r"Rev(.*)By", re.IGNORECASE) # labels is the list of relevant labels defined for github.com/ceph/ceph labels = ['bluestore', 'build/ops', 'cephfs', 'common', 'core', 'mgr', 'mon', 'performance', 'pybind', 'rdma', 'rgw', 'rbd', 'tests', 'tools'] merge_re = re.compile("Merge pull request #(\d+).*") # prefixes is the list of commit description prefixes we recognize prefixes = ['bluestore', 'build/ops', 'cephfs', 'cephx', 'cli', 'cmake', 'common', 'core', 'crush', 'doc', 'fs', 'librados', 'librbd', 'log', 'mds', 'mgr', 'mon', 'msg', 'objecter', 'osd', 'pybind', 'rbd', 'rbd-mirror', 'rbd-nbd', 'rgw', 'tests', 'tools'] signed_off_re = re.compile("Signed-off-by: (.+) <") tracker_re = re.compile("http://tracker.ceph.com/issues/(\d+)") rst_link_re = re.compile(r"([a-zA-Z0-9])_(\W)") tracker_uri = "http://tracker.ceph.com/issues/{0}.json" def get_original_issue(issue, verbose): r = requests.get(tracker_uri.format(issue), params={"include": "relations"}).json() # looking up for the original issue only makes sense # when dealing with an issue in the Backport tracker if r["issue"]["tracker"]["name"] != "Backport": if verbose: print ("http://tracker.ceph.com/issues/" + issue + " is from the tracker " + r["issue"]["tracker"]["name"] + ", do not look for the original issue") return issue # if a Backport issue does not have a relation, keep it if "relations" not in r["issue"]: if verbose: print ("http://tracker.ceph.com/issues/" + issue + " has no relations, do not look for the original issue") return issue copied_to = [ str(i['issue_id']) for i in r["issue"]["relations"] if i["relation_type"] == "copied_to" ] if copied_to: if len(copied_to) > 1: if verbose: print ("ERROR: http://tracker.ceph.com/issues/" + issue + " has more than one Copied To relation") return issue if verbose: print ("http://tracker.ceph.com/issues/" + issue + " is the backport of http://tracker.ceph.com/issues/" + copied_to[0]) return copied_to[0] else: if verbose: print ("http://tracker.ceph.com/issues/" + issue + " has no copied_to relations; do not look for the" + " original issue") return issue def split_component(title, gh, number): title_re = '(' + '|'.join(prefixes) + ')(:.*)' match = re.match(title_re, title) if match: return match.group(1)+match.group(2) else: issue = gh.repos("ceph")("ceph").issues(number).get() issue_labels = {it['name'] for it in issue['labels']} if 'documentation' in issue_labels: return 'doc: ' + title item = labels.intersection(issue_labels) if item: return ",".join(item) + ': ' + title else: return 'UNKNOWN: ' + title def _title_message(commit, pr, strict): title = pr['title'] message_lines = commit.message.split('\n') if strict or len(message_lines) < 1: return (title, None) lines = [] for line in message_lines[1:]: if reviewed_by_re.match(line): continue line = line.strip() if line: lines.append(line) if len(lines) == 0: return (title, None) duplicates_pr_title = lines[0] == pr['title'].strip() if duplicates_pr_title: return (title, None) assert len(lines) > 0, "missing message content" if len(lines) == 1: # assume that a single line means the intention is to # re-write the PR title return (lines[0], None) message = " " + "\n ".join(lines) return (title, message) def make_release_notes(gh, repo, ref, plaintext, verbose, strict, use_tags): issue2prs = {} pr2issues = {} pr2info = {} for commit in repo.iter_commits(ref, merges=True): merge = merge_re.match(commit.summary) if not merge: continue number = merge.group(1) print ("Considering PR#" + number) # do not pick up ceph/ceph-qa-suite.git PRs if int(number) < 1311: print ("Ignoring low-numbered PR, probably picked up from" " ceph/ceph-qa-suite.git") continue pr = gh.repos("ceph")("ceph").pulls(number).get() (title, message) = _title_message(commit, pr, strict) issues = [] if pr['body']: issues = fixes_re.findall(pr['body']) + tracker_re.findall( pr['body'] ) authors = {} for c in repo.iter_commits( "{sha1}^1..{sha1}^2".format(sha1=commit.hexsha) ): for author in re.findall( "Signed-off-by:\s*(.*?)\s*<", c.message ): authors[author] = 1 issues.extend(fixes_re.findall(c.message) + tracker_re.findall(c.message)) if authors: author = ", ".join(authors.keys()) else: author = commit.parents[-1].author.name if strict and not issues: print ("ERROR: https://github.com/ceph/ceph/pull/" + str(number) + " has no associated issue") continue if strict: title_re = ( '^(?:hammer|infernalis|jewel|kraken):\s+(' + '|'.join(prefixes) + ')(:.*)' ) match = re.match(title_re, title) if not match: print ("ERROR: https://github.com/ceph/ceph/pull/" + str(number) + " title " + title.encode("utf-8") + " does not match " + title_re) else: title = match.group(1) + match.group(2) if use_tags: title = split_component(title, gh, number) title = title.strip(' \t\n\r\f\v\.\,\;\:\-\=') # escape asterisks, which is used by reStructuredTextrst for inline # emphasis title = title.replace('*', '\*') # and escape the underscores for noting a link title = rst_link_re.sub(r'\1\_\2', title) pr2info[number] = (author, title, message) for issue in set(issues): if strict: issue = get_original_issue(issue, verbose) issue2prs.setdefault(issue, set([])).add(number) pr2issues.setdefault(number, set([])).add(issue) sys.stdout.write('.') print (" done collecting merges.") if strict: for (issue, prs) in issue2prs.items(): if len(prs) > 1: print (">>>>>>> " + str(len(prs)) + " pr for issue " + issue + " " + str(prs)) for (pr, (author, title, message)) in sorted( pr2info.items(), key=lambda title: title[1][1] ): if pr in pr2issues: if plaintext: issues = map(lambda issue: '#' + str(issue), pr2issues[pr]) else: issues = map(lambda issue: ( '`issue#{issue} `_' ).format(issue=issue), pr2issues[pr] ) issues = ", ".join(issues) + ", " else: issues = '' if plaintext: print ("* {title} ({issues}{author})".format( title=title.encode("utf-8"), issues=issues, author=author.encode("utf-8") ) ) else: print ( ( "* {title} ({issues}`pr#{pr} <" "https://github.com/ceph/ceph/pull/{pr}" ">`_, {author})" ).format( title=title.encode("utf-8"), issues=issues, author=author.encode("utf-8"), pr=pr ) ) if message: print (message) if __name__ == "__main__": desc = ''' Make ceph release notes for a given revision. Eg usage: $ ceph-release-notes -r tags/v0.87..origin/giant \ $(git rev-parse --show-toplevel) It is recommended to set the github env. token in order to avoid hitting the api rate limits. ''' parser = argparse.ArgumentParser( description=desc, formatter_class=argparse.RawTextHelpFormatter ) parser.add_argument("--rev", "-r", help="git revision range for creating release notes") parser.add_argument("--text", "-t", action='store_true', default=None, help="output plain text only, no links") parser.add_argument("--verbose", "-v", action='store_true', default=None, help="verbose") parser.add_argument("--strict", action='store_true', default=None, help="strict, recommended only for backport releases") parser.add_argument("repo", metavar="repo", help="path to ceph git repo") parser.add_argument( "--token", default=os.getenv("GITHUB_ACCESS_TOKEN"), help="Github Access Token ($GITHUB_ACCESS_TOKEN otherwise)", ) parser.add_argument("--use-tags", default=False, help="Use github tags to guess the component") args = parser.parse_args() gh = github.GitHub( access_token=args.token) make_release_notes( gh, Repo(args.repo), args.rev, args.text, args.verbose, args.strict, args.use_tags )