#!/usr/bin/python

"""
Given some markdown text of the general kind we use in Arti changelogs,
look for reference-style links to MRs, issues, and commits, and generate
the appropriate https URLs for them.

Takes input either from a file, or from stdin.

Example:
  ./gen_md_links < new_changelog
"""

import re
import subprocess

LINK_PATTERN = re.compile(r'''
  \[([^\]]*)\] # Match [, then some non-] characters, then ]...
  (?!\()       # But only when not followed by (.
''', re.X)


def links(s):
    """Extract unresolved markdown links from a string.

       This is a naive algorithm, and could eventually be replaced
       with a real markdown parsing library.  But for our changelogs,
       it should be fine.

       >>> list(links("Hello [world]. This [is a link]"))
       ['world', 'is a link']
       >>> list(links("This [link](is resolved)."))
       []
    """

    for m in LINK_PATTERN.finditer(s):
        yield m.group(1)


def is_commit(s):
    """Return true if `s` looks like a git commit.

       >>> is_commit("a3bcD445")
       True
       >>> is_commit("xyzzy123")
       False
    """

    if len(s) >= 6:
        try:
            int(s, 16)
            return True
        except ValueError:
            pass
    return False


def lookup_git_commit(short):
    """Expand a git commit from its short version.

       >>> lookup_git_commit("214c251e41")
       '214c251e41a7583397cc5939b9447b89752ee323'
       >>> lookup_git_commit("00000000000000")
       Traceback (most recent call last):
           ...
       ValueError: Unrecognized git commit 00000000000000
    """

    p = subprocess.Popen(["git", "rev-parse", short], stdout=subprocess.PIPE,
                         stderr=subprocess.DEVNULL)
    p.wait()
    if p.returncode != 0:
        raise ValueError(f"Unrecognized git commit {short}")
    return p.stdout.read().strip().decode("ascii")


class LinkType:
    MergeRequest = 1
    Issue = 2
    Commit = 3
    Other = 4


class Link:
    def __init__(self, s):
        self._s = s
        if s.startswith("!") and s[1:].isdecimal():
            self._type = LinkType.MergeRequest
            self._id = int(s[1:])
        elif s.startswith("#") and s[1:].isdecimal():
            self._type = LinkType.Issue
            self._id = int(s[1:])
        elif is_commit(s):
            self._type = LinkType.Commit
            self._id = s.lower()
        else:
            self._type = LinkType.Other
            self._id = s

    def sort_key(self):
        return (self._type, self._id)

    def link(self):
        if self._type == LinkType.MergeRequest:
            return f"https://gitlab.torproject.org/tpo/core/arti/-/merge_requests/{self._id}"
        elif self._type == LinkType.Issue:
            return f"https://gitlab.torproject.org/tpo/core/arti/-/issues/{self._id}"
        elif self._type == LinkType.Commit:
            full_id = lookup_git_commit(self._id)
            return f"https://gitlab.torproject.org/tpo/core/arti/-/commit/{full_id}"
        elif self._type == LinkType.Other:
            return ""

    def text(self):
        return "[{}]: {}\n".format(self._s, self.link())


def process(s):
    """Given a string with a bunch of markdown links in the style we use
       in our changelog, generate the following material to insert in
       the changelog.

       >>> print(process("Hello [#123] [!456]"), end="")
       [!456]: https://gitlab.torproject.org/tpo/core/arti/-/merge_requests/456
       [#123]: https://gitlab.torproject.org/tpo/core/arti/-/issues/123
    """

    items = sorted((Link(lnk) for lnk in set(links(s))), key=Link.sort_key)
    return "".join(lnk.text() for lnk in items)


if __name__ == '__main__':
    import sys
    if len(sys.argv) == 1 or sys.argv[1] == "-":
        in_file = sys.stdin
    else:
        in_file = open(sys.argv[1], 'r')

    text = in_file.read()
    print(process(text))
