initial code repo
[stor4nfv.git] / src / ceph / src / script / ceph-release-notes
diff --git a/src/ceph/src/script/ceph-release-notes b/src/ceph/src/script/ceph-release-notes
new file mode 100755 (executable)
index 0000000..3a3da4e
--- /dev/null
@@ -0,0 +1,310 @@
+#!/usr/bin/env python
+# Originally modified from A. Israel's script seen at
+# https://gist.github.com/aisrael/b2b78d9dfdd176a232b9
+"""To run this script first install the dependencies
+
+
+  virtualenv v
+  source v/bin/activate
+  pip install githubpy GitPython requests
+
+Generate a github access token; this is needed as the anonymous access
+to Github's API will easily hit the limit even with a single invocation.
+For details see:
+https://help.github.com/articles/creating-an-access-token-for-command-line-use/
+
+Next either set the github token as an env variable
+`GITHUB_ACCESS_TOKEN` or alternatively invoke the script with
+`--token` switch.
+
+Example:
+
+  ceph-release-notes -r tags/v0.87..origin/giant \
+      $(git rev-parse --show-toplevel)
+
+"""
+
+from __future__ import print_function
+import argparse
+import github
+import os
+import re
+import sys
+import requests
+
+from git import Repo
+
+
+fixes_re = re.compile(r"Fixes\:? #(\d+)")
+reviewed_by_re = re.compile(r"Rev(.*)By", re.IGNORECASE)
+# labels is the list of relevant labels defined for github.com/ceph/ceph
+labels = ['bluestore', 'build/ops', 'cephfs', 'common', 'core', 'mgr',
+          'mon', 'performance', 'pybind', 'rdma', 'rgw', 'rbd', 'tests',
+          'tools']
+merge_re = re.compile("Merge pull request #(\d+).*")
+# prefixes is the list of commit description prefixes we recognize
+prefixes = ['bluestore', 'build/ops', 'cephfs', 'cephx', 'cli', 'cmake',
+            'common', 'core', 'crush', 'doc', 'fs', 'librados', 'librbd',
+            'log', 'mds', 'mgr', 'mon', 'msg', 'objecter', 'osd', 'pybind',
+            'rbd', 'rbd-mirror', 'rbd-nbd', 'rgw', 'tests', 'tools']
+signed_off_re = re.compile("Signed-off-by: (.+) <")
+tracker_re = re.compile("http://tracker.ceph.com/issues/(\d+)")
+rst_link_re = re.compile(r"([a-zA-Z0-9])_(\W)")
+tracker_uri = "http://tracker.ceph.com/issues/{0}.json"
+
+
+def get_original_issue(issue, verbose):
+    r = requests.get(tracker_uri.format(issue),
+                     params={"include": "relations"}).json()
+
+    # looking up for the original issue only makes sense
+    # when dealing with an issue in the Backport tracker
+    if r["issue"]["tracker"]["name"] != "Backport":
+        if verbose:
+            print ("http://tracker.ceph.com/issues/" + issue +
+                   " is from the tracker " + r["issue"]["tracker"]["name"] +
+                   ", do not look for the original issue")
+        return issue
+
+    # if a Backport issue does not have a relation, keep it
+    if "relations" not in r["issue"]:
+        if verbose:
+            print ("http://tracker.ceph.com/issues/" + issue +
+                   " has no relations, do not look for the original issue")
+        return issue
+
+    copied_to = [
+                    str(i['issue_id']) for i in r["issue"]["relations"]
+                    if i["relation_type"] == "copied_to"
+                ]
+    if copied_to:
+        if len(copied_to) > 1:
+            if verbose:
+                print ("ERROR: http://tracker.ceph.com/issues/" + issue +
+                       " has more than one Copied To relation")
+            return issue
+        if verbose:
+            print ("http://tracker.ceph.com/issues/" + issue +
+                   " is the backport of http://tracker.ceph.com/issues/" +
+                   copied_to[0])
+        return copied_to[0]
+    else:
+        if verbose:
+            print ("http://tracker.ceph.com/issues/" + issue +
+                   " has no copied_to relations; do not look for the" +
+                   " original issue")
+        return issue
+
+
+def split_component(title, gh, number):
+    title_re = '(' + '|'.join(prefixes) + ')(:.*)'
+    match = re.match(title_re, title)
+    if match:
+        return match.group(1)+match.group(2)
+    else:
+        issue = gh.repos("ceph")("ceph").issues(number).get()
+        issue_labels = {it['name'] for it in issue['labels']}
+        if 'documentation' in issue_labels:
+            return 'doc: ' + title
+        item = labels.intersection(issue_labels)
+        if item:
+            return ",".join(item) + ': ' + title
+        else:
+            return 'UNKNOWN: ' + title
+
+def _title_message(commit, pr, strict):
+    title = pr['title']
+    message_lines = commit.message.split('\n')
+    if strict or len(message_lines) < 1:
+        return (title, None)
+    lines = []
+    for line in message_lines[1:]:
+        if reviewed_by_re.match(line):
+            continue
+        line = line.strip()
+        if line:
+            lines.append(line)
+    if len(lines) == 0:
+        return (title, None)
+    duplicates_pr_title = lines[0] == pr['title'].strip()
+    if duplicates_pr_title:
+        return (title, None)
+    assert len(lines) > 0, "missing message content"
+    if len(lines) == 1:
+        # assume that a single line means the intention is to
+        # re-write the PR title
+        return (lines[0], None)
+    message = "    " + "\n    ".join(lines)
+    return (title, message)
+
+def make_release_notes(gh, repo, ref, plaintext, verbose, strict, use_tags):
+
+    issue2prs = {}
+    pr2issues = {}
+    pr2info = {}
+
+    for commit in repo.iter_commits(ref, merges=True):
+        merge = merge_re.match(commit.summary)
+        if not merge:
+            continue
+        number = merge.group(1)
+        print ("Considering PR#" + number)
+        # do not pick up ceph/ceph-qa-suite.git PRs
+        if int(number) < 1311:
+            print ("Ignoring low-numbered PR, probably picked up from"
+                   " ceph/ceph-qa-suite.git")
+            continue
+        pr = gh.repos("ceph")("ceph").pulls(number).get()
+        (title, message) = _title_message(commit, pr, strict)
+        issues = []
+        if pr['body']:
+            issues = fixes_re.findall(pr['body']) + tracker_re.findall(
+                pr['body']
+            )
+
+        authors = {}
+        for c in repo.iter_commits(
+                     "{sha1}^1..{sha1}^2".format(sha1=commit.hexsha)
+                 ):
+            for author in re.findall(
+                              "Signed-off-by:\s*(.*?)\s*<", c.message
+                          ):
+                authors[author] = 1
+            issues.extend(fixes_re.findall(c.message) +
+                          tracker_re.findall(c.message))
+        if authors:
+            author = ", ".join(authors.keys())
+        else:
+            author = commit.parents[-1].author.name
+
+        if strict and not issues:
+            print ("ERROR: https://github.com/ceph/ceph/pull/" +
+                   str(number) + " has no associated issue")
+            continue
+
+        if strict:
+            title_re = (
+                '^(?:hammer|infernalis|jewel|kraken):\s+(' +
+                '|'.join(prefixes) +
+                ')(:.*)'
+            )
+            match = re.match(title_re, title)
+            if not match:
+                print ("ERROR: https://github.com/ceph/ceph/pull/" +
+                       str(number) + " title " + title.encode("utf-8") +
+                       " does not match " + title_re)
+            else:
+                title = match.group(1) + match.group(2)
+        if use_tags:
+            title = split_component(title, gh, number)
+
+        title = title.strip(' \t\n\r\f\v\.\,\;\:\-\=')
+        # escape asterisks, which is used by reStructuredTextrst for inline
+        # emphasis
+        title = title.replace('*', '\*')
+        # and escape the underscores for noting a link
+        title = rst_link_re.sub(r'\1\_\2', title)
+        pr2info[number] = (author, title, message)
+
+        for issue in set(issues):
+            if strict:
+                issue = get_original_issue(issue, verbose)
+            issue2prs.setdefault(issue, set([])).add(number)
+            pr2issues.setdefault(number, set([])).add(issue)
+        sys.stdout.write('.')
+
+    print (" done collecting merges.")
+
+    if strict:
+        for (issue, prs) in issue2prs.items():
+            if len(prs) > 1:
+                print (">>>>>>> " + str(len(prs)) + " pr for issue " +
+                       issue + " " + str(prs))
+
+    for (pr, (author, title, message)) in sorted(
+        pr2info.items(), key=lambda title: title[1][1]
+    ):
+        if pr in pr2issues:
+            if plaintext:
+                issues = map(lambda issue: '#' + str(issue), pr2issues[pr])
+            else:
+                issues = map(lambda issue: (
+                    '`issue#{issue} <http://tracker.ceph.com/issues/{issue}>`_'
+                    ).format(issue=issue), pr2issues[pr]
+                )
+            issues = ", ".join(issues) + ", "
+        else:
+            issues = ''
+        if plaintext:
+            print ("* {title} ({issues}{author})".format(
+                    title=title.encode("utf-8"),
+                    issues=issues,
+                    author=author.encode("utf-8")
+                )
+            )
+        else:
+            print (
+                (
+                    "* {title} ({issues}`pr#{pr} <"
+                    "https://github.com/ceph/ceph/pull/{pr}"
+                    ">`_, {author})"
+                ).format(
+                    title=title.encode("utf-8"),
+                    issues=issues,
+                    author=author.encode("utf-8"), pr=pr
+                )
+            )
+        if message:
+            print (message)
+
+
+if __name__ == "__main__":
+    desc = '''
+    Make ceph release notes for a given revision. Eg usage:
+
+    $ ceph-release-notes -r tags/v0.87..origin/giant \
+        $(git rev-parse --show-toplevel)
+
+    It is recommended to set the github env. token in order to avoid
+    hitting the api rate limits.
+    '''
+
+    parser = argparse.ArgumentParser(
+        description=desc,
+        formatter_class=argparse.RawTextHelpFormatter
+    )
+
+    parser.add_argument("--rev", "-r",
+                        help="git revision range for creating release notes")
+    parser.add_argument("--text", "-t",
+                        action='store_true', default=None,
+                        help="output plain text only, no links")
+    parser.add_argument("--verbose", "-v",
+                        action='store_true', default=None,
+                        help="verbose")
+    parser.add_argument("--strict",
+                        action='store_true', default=None,
+                        help="strict, recommended only for backport releases")
+    parser.add_argument("repo", metavar="repo",
+                        help="path to ceph git repo")
+    parser.add_argument(
+        "--token",
+        default=os.getenv("GITHUB_ACCESS_TOKEN"),
+        help="Github Access Token ($GITHUB_ACCESS_TOKEN otherwise)",
+    )
+    parser.add_argument("--use-tags", default=False,
+                        help="Use github tags to guess the component")
+
+    args = parser.parse_args()
+    gh = github.GitHub(
+        access_token=args.token)
+
+    make_release_notes(
+        gh,
+        Repo(args.repo),
+        args.rev,
+        args.text,
+        args.verbose,
+        args.strict,
+        args.use_tags
+    )