src/ceph/src/script/ceph-release-notes

   1 #!/usr/bin/env python
   2 # Originally modified from A. Israel's script seen at
   3 # https://gist.github.com/aisrael/b2b78d9dfdd176a232b9
   4 """To run this script first install the dependencies
   5
   6
   7   virtualenv v
   8   source v/bin/activate
   9   pip install githubpy GitPython requests
  10
  11 Generate a github access token; this is needed as the anonymous access
  12 to Github's API will easily hit the limit even with a single invocation.
  13 For details see:
  14 https://help.github.com/articles/creating-an-access-token-for-command-line-use/
  15
  16 Next either set the github token as an env variable
  17 `GITHUB_ACCESS_TOKEN` or alternatively invoke the script with
  18 `--token` switch.
  19
  20 Example:
  21
  22   ceph-release-notes -r tags/v0.87..origin/giant \
  23       $(git rev-parse --show-toplevel)
  24
  25 """
  26
  27 from __future__ import print_function
  28 import argparse
  29 import github
  30 import os
  31 import re
  32 import sys
  33 import requests
  34
  35 from git import Repo
  36
  37
  38 fixes_re = re.compile(r"Fixes\:? #(\d+)")
  39 reviewed_by_re = re.compile(r"Rev(.*)By", re.IGNORECASE)
  40 # labels is the list of relevant labels defined for github.com/ceph/ceph
  41 labels = ['bluestore', 'build/ops', 'cephfs', 'common', 'core', 'mgr',
  42           'mon', 'performance', 'pybind', 'rdma', 'rgw', 'rbd', 'tests',
  43           'tools']
  44 merge_re = re.compile("Merge pull request #(\d+).*")
  45 # prefixes is the list of commit description prefixes we recognize
  46 prefixes = ['bluestore', 'build/ops', 'cephfs', 'cephx', 'cli', 'cmake',
  47             'common', 'core', 'crush', 'doc', 'fs', 'librados', 'librbd',
  48             'log', 'mds', 'mgr', 'mon', 'msg', 'objecter', 'osd', 'pybind',
  49             'rbd', 'rbd-mirror', 'rbd-nbd', 'rgw', 'tests', 'tools']
  50 signed_off_re = re.compile("Signed-off-by: (.+) <")
  51 tracker_re = re.compile("http://tracker.ceph.com/issues/(\d+)")
  52 rst_link_re = re.compile(r"([a-zA-Z0-9])_(\W)")
  53 tracker_uri = "http://tracker.ceph.com/issues/{0}.json"
  54
  55
  56 def get_original_issue(issue, verbose):
  57     r = requests.get(tracker_uri.format(issue),
  58                      params={"include": "relations"}).json()
  59
  60     # looking up for the original issue only makes sense
  61     # when dealing with an issue in the Backport tracker
  62     if r["issue"]["tracker"]["name"] != "Backport":
  63         if verbose:
  64             print ("http://tracker.ceph.com/issues/" + issue +
  65                    " is from the tracker " + r["issue"]["tracker"]["name"] +
  66                    ", do not look for the original issue")
  67         return issue
  68
  69     # if a Backport issue does not have a relation, keep it
  70     if "relations" not in r["issue"]:
  71         if verbose:
  72             print ("http://tracker.ceph.com/issues/" + issue +
  73                    " has no relations, do not look for the original issue")
  74         return issue
  75
  76     copied_to = [
  77                     str(i['issue_id']) for i in r["issue"]["relations"]
  78                     if i["relation_type"] == "copied_to"
  79                 ]
  80     if copied_to:
  81         if len(copied_to) > 1:
  82             if verbose:
  83                 print ("ERROR: http://tracker.ceph.com/issues/" + issue +
  84                        " has more than one Copied To relation")
  85             return issue
  86         if verbose:
  87             print ("http://tracker.ceph.com/issues/" + issue +
  88                    " is the backport of http://tracker.ceph.com/issues/" +
  89                    copied_to[0])
  90         return copied_to[0]
  91     else:
  92         if verbose:
  93             print ("http://tracker.ceph.com/issues/" + issue +
  94                    " has no copied_to relations; do not look for the" +
  95                    " original issue")
  96         return issue
  97
  98
  99 def split_component(title, gh, number):
 100     title_re = '(' + '|'.join(prefixes) + ')(:.*)'
 101     match = re.match(title_re, title)
 102     if match:
 103         return match.group(1)+match.group(2)
 104     else:
 105         issue = gh.repos("ceph")("ceph").issues(number).get()
 106         issue_labels = {it['name'] for it in issue['labels']}
 107         if 'documentation' in issue_labels:
 108             return 'doc: ' + title
 109         item = labels.intersection(issue_labels)
 110         if item:
 111             return ",".join(item) + ': ' + title
 112         else:
 113             return 'UNKNOWN: ' + title
 114
 115 def _title_message(commit, pr, strict):
 116     title = pr['title']
 117     message_lines = commit.message.split('\n')
 118     if strict or len(message_lines) < 1:
 119         return (title, None)
 120     lines = []
 121     for line in message_lines[1:]:
 122         if reviewed_by_re.match(line):
 123             continue
 124         line = line.strip()
 125         if line:
 126             lines.append(line)
 127     if len(lines) == 0:
 128         return (title, None)
 129     duplicates_pr_title = lines[0] == pr['title'].strip()
 130     if duplicates_pr_title:
 131         return (title, None)
 132     assert len(lines) > 0, "missing message content"
 133     if len(lines) == 1:
 134         # assume that a single line means the intention is to
 135         # re-write the PR title
 136         return (lines[0], None)
 137     message = "    " + "\n    ".join(lines)
 138     return (title, message)
 139
 140 def make_release_notes(gh, repo, ref, plaintext, verbose, strict, use_tags):
 141
 142     issue2prs = {}
 143     pr2issues = {}
 144     pr2info = {}
 145
 146     for commit in repo.iter_commits(ref, merges=True):
 147         merge = merge_re.match(commit.summary)
 148         if not merge:
 149             continue
 150         number = merge.group(1)
 151         print ("Considering PR#" + number)
 152         # do not pick up ceph/ceph-qa-suite.git PRs
 153         if int(number) < 1311:
 154             print ("Ignoring low-numbered PR, probably picked up from"
 155                    " ceph/ceph-qa-suite.git")
 156             continue
 157         pr = gh.repos("ceph")("ceph").pulls(number).get()
 158         (title, message) = _title_message(commit, pr, strict)
 159         issues = []
 160         if pr['body']:
 161             issues = fixes_re.findall(pr['body']) + tracker_re.findall(
 162                 pr['body']
 163             )
 164
 165         authors = {}
 166         for c in repo.iter_commits(
 167                      "{sha1}^1..{sha1}^2".format(sha1=commit.hexsha)
 168                  ):
 169             for author in re.findall(
 170                               "Signed-off-by:\s*(.*?)\s*<", c.message
 171                           ):
 172                 authors[author] = 1
 173             issues.extend(fixes_re.findall(c.message) +
 174                           tracker_re.findall(c.message))
 175         if authors:
 176             author = ", ".join(authors.keys())
 177         else:
 178             author = commit.parents[-1].author.name
 179
 180         if strict and not issues:
 181             print ("ERROR: https://github.com/ceph/ceph/pull/" +
 182                    str(number) + " has no associated issue")
 183             continue
 184
 185         if strict:
 186             title_re = (
 187                 '^(?:hammer|infernalis|jewel|kraken):\s+(' +
 188                 '|'.join(prefixes) +
 189                 ')(:.*)'
 190             )
 191             match = re.match(title_re, title)
 192             if not match:
 193                 print ("ERROR: https://github.com/ceph/ceph/pull/" +
 194                        str(number) + " title " + title.encode("utf-8") +
 195                        " does not match " + title_re)
 196             else:
 197                 title = match.group(1) + match.group(2)
 198         if use_tags:
 199             title = split_component(title, gh, number)
 200
 201         title = title.strip(' \t\n\r\f\v\.\,\;\:\-\=')
 202         # escape asterisks, which is used by reStructuredTextrst for inline
 203         # emphasis
 204         title = title.replace('*', '\*')
 205         # and escape the underscores for noting a link
 206         title = rst_link_re.sub(r'\1\_\2', title)
 207         pr2info[number] = (author, title, message)
 208
 209         for issue in set(issues):
 210             if strict:
 211                 issue = get_original_issue(issue, verbose)
 212             issue2prs.setdefault(issue, set([])).add(number)
 213             pr2issues.setdefault(number, set([])).add(issue)
 214         sys.stdout.write('.')
 215
 216     print (" done collecting merges.")
 217
 218     if strict:
 219         for (issue, prs) in issue2prs.items():
 220             if len(prs) > 1:
 221                 print (">>>>>>> " + str(len(prs)) + " pr for issue " +
 222                        issue + " " + str(prs))
 223
 224     for (pr, (author, title, message)) in sorted(
 225         pr2info.items(), key=lambda title: title[1][1]
 226     ):
 227         if pr in pr2issues:
 228             if plaintext:
 229                 issues = map(lambda issue: '#' + str(issue), pr2issues[pr])
 230             else:
 231                 issues = map(lambda issue: (
 232                     '`issue#{issue} <http://tracker.ceph.com/issues/{issue}>`_'
 233                     ).format(issue=issue), pr2issues[pr]
 234                 )
 235             issues = ", ".join(issues) + ", "
 236         else:
 237             issues = ''
 238         if plaintext:
 239             print ("* {title} ({issues}{author})".format(
 240                     title=title.encode("utf-8"),
 241                     issues=issues,
 242                     author=author.encode("utf-8")
 243                 )
 244             )
 245         else:
 246             print (
 247                 (
 248                     "* {title} ({issues}`pr#{pr} <"
 249                     "https://github.com/ceph/ceph/pull/{pr}"
 250                     ">`_, {author})"
 251                 ).format(
 252                     title=title.encode("utf-8"),
 253                     issues=issues,
 254                     author=author.encode("utf-8"), pr=pr
 255                 )
 256             )
 257         if message:
 258             print (message)
 259
 260
 261 if __name__ == "__main__":
 262     desc = '''
 263     Make ceph release notes for a given revision. Eg usage:
 264
 265     $ ceph-release-notes -r tags/v0.87..origin/giant \
 266         $(git rev-parse --show-toplevel)
 267
 268     It is recommended to set the github env. token in order to avoid
 269     hitting the api rate limits.
 270     '''
 271
 272     parser = argparse.ArgumentParser(
 273         description=desc,
 274         formatter_class=argparse.RawTextHelpFormatter
 275     )
 276
 277     parser.add_argument("--rev", "-r",
 278                         help="git revision range for creating release notes")
 279     parser.add_argument("--text", "-t",
 280                         action='store_true', default=None,
 281                         help="output plain text only, no links")
 282     parser.add_argument("--verbose", "-v",
 283                         action='store_true', default=None,
 284                         help="verbose")
 285     parser.add_argument("--strict",
 286                         action='store_true', default=None,
 287                         help="strict, recommended only for backport releases")
 288     parser.add_argument("repo", metavar="repo",
 289                         help="path to ceph git repo")
 290     parser.add_argument(
 291         "--token",
 292         default=os.getenv("GITHUB_ACCESS_TOKEN"),
 293         help="Github Access Token ($GITHUB_ACCESS_TOKEN otherwise)",
 294     )
 295     parser.add_argument("--use-tags", default=False,
 296                         help="Use github tags to guess the component")
 297
 298     args = parser.parse_args()
 299     gh = github.GitHub(
 300         access_token=args.token)
 301
 302     make_release_notes(
 303         gh,
 304         Repo(args.repo),
 305         args.rev,
 306         args.text,
 307         args.verbose,
 308         args.strict,
 309         args.use_tags
 310     )