xref: /illumos-gate/usr/src/tools/scripts/git-pbchk.py (revision c093b3ec6d35e1fe023174ed7f6ca6b90690d526)
1#!@TOOLS_PYTHON@ -Es
2#
3#  This program is free software; you can redistribute it and/or modify
4#  it under the terms of the GNU General Public License version 2
5#  as published by the Free Software Foundation.
6#
7#  This program is distributed in the hope that it will be useful,
8#  but WITHOUT ANY WARRANTY; without even the implied warranty of
9#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10#  GNU General Public License for more details.
11#
12#  You should have received a copy of the GNU General Public License
13#  along with this program; if not, write to the Free Software
14#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15#
16
17#
18# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
19# Copyright 2008, 2012 Richard Lowe
20# Copyright 2019 Garrett D'Amore <garrett@damore.org>
21# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
22# Copyright 2016 Nexenta Systems, Inc.
23# Copyright (c) 2019, Joyent, Inc.
24# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
25# Copyright 2024 Bill Sommerfeld
26#
27
28from __future__ import print_function
29
30import getopt
31import io
32import os
33import re
34import subprocess
35import sys
36import tempfile
37
38if sys.version_info[0] < 3:
39    from cStringIO import StringIO
40else:
41    from io import StringIO
42
43#
44# Adjust the load path based on our location and the version of python into
45# which it is being loaded.  This assumes the normal onbld directory
46# structure, where we are in bin/ and the modules are in
47# lib/python(version)?/onbld/Scm/.  If that changes so too must this.
48#
49sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..", "lib",
50                                "python%d.%d" % sys.version_info[:2]))
51
52#
53# Add the relative path to usr/src/tools to the load path, such that when run
54# from the source tree we use the modules also within the source tree.
55#
56sys.path.insert(2, os.path.join(os.path.dirname(__file__), ".."))
57
58from onbld.Scm import Ignore
59from onbld.Checks import Comments, Copyright, CStyle, HdrChk, WsCheck
60from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck
61from onbld.Checks import ShellLint, PkgFmt
62
63class GitError(Exception):
64    pass
65
66def git(command):
67    """Run a command and return a stream containing its stdout (and write its
68    stderr to its stdout)"""
69
70    if type(command) != list:
71        command = command.split()
72
73    command = ["git"] + command
74
75    try:
76        tmpfile = tempfile.TemporaryFile(prefix="git-nits", mode="w+b")
77    except EnvironmentError as e:
78        raise GitError("Could not create temporary file: %s\n" % e)
79
80    try:
81        p = subprocess.Popen(command,
82                             stdout=tmpfile,
83                             stderr=subprocess.PIPE)
84    except OSError as e:
85        raise GitError("could not execute %s: %s\n" % (command, e))
86
87    err = p.wait()
88    if err != 0:
89        raise GitError(p.stderr.read())
90
91    tmpfile.seek(0)
92    lines = []
93    for l in tmpfile:
94        lines.append(l.decode('utf-8', 'replace'))
95    return lines
96
97def git_root():
98    """Return the root of the current git workspace"""
99
100    p = git('rev-parse --show-toplevel')
101    dir = p[0].strip()
102
103    return os.path.abspath(dir)
104
105def git_branch():
106    """Return the current git branch"""
107
108    p = git('branch')
109
110    for elt in p:
111        if elt[0] == '*':
112            if elt.endswith('(no branch)'):
113                return None
114            return elt.split()[1]
115
116def git_parent_branch(branch):
117    """Return the parent of the current git branch.
118
119    If this branch tracks a remote branch, return the remote branch which is
120    tracked.  If not, default to origin/master."""
121
122    if not branch:
123        return None
124
125    p = git(["for-each-ref", "--format=%(refname:short) %(upstream:short)",
126            "refs/heads/"])
127
128    if not p:
129        sys.stderr.write("Failed finding git parent branch\n")
130        sys.exit(1)
131
132    for line in p:
133        # Git 1.7 will leave a ' ' trailing any non-tracking branch
134        if ' ' in line and not line.endswith(' \n'):
135            local, remote = line.split()
136            if local == branch:
137                return remote
138    return 'origin/master'
139
140def slices(strlist, sep):
141    """Yield start & end of each commit within the list of comments"""
142    low = 0
143    for i, v in enumerate(strlist):
144        if v == sep:
145            yield(low, i)
146            low = i+1
147
148    if low != len(strlist):
149        yield(low, len(strlist))
150
151def git_comments(parent):
152    """Return the checkin comments for each commit on this git branch,
153    structured as a list of lists of lines."""
154
155    p = git('log --pretty=tformat:%%B:SEP: %s..' % parent)
156
157    if not p:
158        sys.stderr.write("No outgoing changesets found - missing -p option?\n");
159        sys.exit(1)
160
161    return [ [line.strip() for line in p[a:b]]
162             for (a, b) in slices(p, ':SEP:\n')]
163
164def git_file_list(parent, paths=None):
165    """Return the set of files which have ever changed on this branch.
166
167    NB: This includes files which no longer exist, or no longer actually
168    differ."""
169
170    p = git("log --name-only --pretty=format: %s.. %s" %
171             (parent, ' '.join(paths)))
172
173    if not p:
174        sys.stderr.write("Failed building file-list from git\n")
175        sys.exit(1)
176
177    ret = set()
178    for fname in p:
179        fname = fname.strip()
180        if fname and not fname.isspace():
181            ret.add(fname)
182
183    return sorted(ret)
184
185def not_check(root, cmd):
186    """Return a function which returns True if a file given as an argument
187    should be excluded from the check named by 'cmd'"""
188
189    ignorefiles = list(filter(os.path.exists,
190                         [os.path.join(root, ".git/info", "%s.NOT" % cmd),
191                          os.path.join(root, "exception_lists", cmd)]))
192    return Ignore.ignore(root, ignorefiles)
193
194def gen_files(root, parent, paths, exclude, filter=None):
195    """Return a function producing file names, relative to the current
196    directory, of any file changed on this branch (limited to 'paths' if
197    requested), and excluding files for which exclude returns a true value """
198
199    if filter is None:
200        filter = lambda x: os.path.isfile(x)
201
202    def ret(select=None):
203        if not select:
204            select = lambda x: True
205
206        for abspath in git_file_list(parent, paths):
207            path = os.path.relpath(os.path.join(root, abspath), '.')
208            try:
209                res = git("diff %s HEAD %s" % (parent, path))
210            except GitError as e:
211                # This ignores all the errors that can be thrown. Usually, this
212                # means that git returned non-zero because the file doesn't
213                # exist, but it could also fail if git can't create a new file
214                # or it can't be executed.  Such errors are 1) unlikely, and 2)
215                # will be caught by other invocations of git().
216                continue
217            empty = not res
218            if (filter(path) and not empty and
219                select(path) and not exclude(abspath)):
220                yield path
221    return ret
222
223def gen_links(root, parent, paths, exclude):
224    """Return a function producing symbolic link names, relative to the current
225    directory, of any file changed on this branch (limited to 'paths' if
226    requested), and excluding files for which exclude returns a true value """
227
228    return gen_files(root, parent, paths, exclude, lambda x: os.path.islink(x))
229
230def comchk(root, parent, flist, output):
231    output.write("Comments:\n")
232
233    comments = git_comments(parent)
234    multi = len(comments) > 1
235    state = {}
236
237    ret = 0
238    for commit in comments:
239
240        s = StringIO()
241
242        result = Comments.comchk(commit, check_db=True,
243                                 output=s, bugs=state)
244        ret |= result
245
246        if result != 0:
247            if multi:
248                output.write('\n%s\n' % commit[0])
249            output.write(s.getvalue())
250
251    return ret
252
253def mapfilechk(root, parent, flist, output):
254    ret = 0
255
256    # We are interested in examining any file that has the following
257    # in its final path segment:
258    #    - Contains the word 'mapfile'
259    #    - Begins with 'map.'
260    #    - Ends with '.map'
261    # We don't want to match unless these things occur in final path segment
262    # because directory names with these strings don't indicate a mapfile.
263    # We also ignore files with suffixes that tell us that the files
264    # are not mapfiles.
265    MapfileRE = re.compile(r'.*((mapfile[^/]*)|(/map\.+[^/]*)|(\.map))$',
266        re.IGNORECASE)
267    NotMapSuffixRE = re.compile(r'.*\.[ch]$', re.IGNORECASE)
268
269    output.write("Mapfile comments:\n")
270
271    for f in flist(lambda x: MapfileRE.match(x) and not
272                   NotMapSuffixRE.match(x)):
273        with io.open(f, encoding='utf-8', errors='replace') as fh:
274            ret |= Mapfile.mapfilechk(fh, output=output)
275    return ret
276
277def copyright(root, parent, flist, output):
278    ret = 0
279    output.write("Copyrights:\n")
280    for f in flist():
281        with io.open(f, encoding='utf-8', errors='replace') as fh:
282            ret |= Copyright.copyright(fh, output=output)
283    return ret
284
285def hdrchk(root, parent, flist, output):
286    ret = 0
287    output.write("Header format:\n")
288    for f in flist(lambda x: x.endswith('.h')):
289        with io.open(f, encoding='utf-8', errors='replace') as fh:
290            ret |= HdrChk.hdrchk(fh, lenient=True, output=output)
291    return ret
292
293def cstyle(root, parent, flist, output):
294    ret = 0
295    output.write("C style:\n")
296    for f in flist(lambda x: x.endswith('.c') or x.endswith('.h')):
297        with io.open(f, mode='rb') as fh:
298            ret |= CStyle.cstyle(fh, output=output, picky=True,
299                             check_posix_types=True,
300                             check_continuation=True)
301    return ret
302
303def jstyle(root, parent, flist, output):
304    ret = 0
305    output.write("Java style:\n")
306    for f in flist(lambda x: x.endswith('.java')):
307        with io.open(f, mode='rb') as fh:
308            ret |= JStyle.jstyle(fh, output=output, picky=True)
309    return ret
310
311def manlint(root, parent, flist, output):
312    ret = 0
313    output.write("Man page format/spelling:\n")
314    ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE)
315    for f in flist(lambda x: ManfileRE.match(x)):
316        with io.open(f, mode='rb') as fh:
317            ret |= ManLint.manlint(fh, output=output, picky=True)
318            ret |= SpellCheck.spellcheck(fh, output=output)
319    return ret
320
321def shelllint(root, parent, flist, output):
322    ret = 0
323    output.write("Shell lint:\n")
324
325    def isshell(x):
326        (_, ext) = os.path.splitext(x)
327        if ext in ['.sh', '.ksh']:
328            return True
329        if ext == '':
330            with io.open(x, mode='r', errors='ignore') as fh:
331                if re.match(r'^#.*\bk?sh\b', fh.readline()):
332                    return True
333        return False
334
335    for f in flist(isshell):
336        with io.open(f, mode='rb') as fh:
337            ret |= ShellLint.lint(fh, output=output)
338
339    return ret
340
341def pkgfmt(root, parent, flist, output):
342    ret = 0
343    output.write("Package manifests:\n")
344
345    for f in flist(lambda x: x.endswith('.p5m')):
346        with io.open(f, mode='rb') as fh:
347            ret |= PkgFmt.check(fh, output=output)
348
349    return ret
350
351def keywords(root, parent, flist, output):
352    ret = 0
353    output.write("SCCS Keywords:\n")
354    for f in flist():
355        with io.open(f, encoding='utf-8', errors='replace') as fh:
356            ret |= Keywords.keywords(fh, output=output)
357    return ret
358
359def wscheck(root, parent, flist, output):
360    ret = 0
361    output.write("white space nits:\n")
362    for f in flist():
363        with io.open(f, encoding='utf-8', errors='replace') as fh:
364            ret |= WsCheck.wscheck(fh, output=output)
365    return ret
366
367def symlinks(root, parent, flist, output):
368    ret = 0
369    output.write("Symbolic links:\n")
370    for f in flist():
371        output.write("  "+f+"\n")
372        ret |= 1
373    return ret
374
375def iswinreserved(name):
376    reserved = [
377        'con', 'prn', 'aux', 'nul',
378        'com1', 'com2', 'com3', 'com4', 'com5',
379        'com6', 'com7', 'com8', 'com9', 'com0',
380        'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5',
381        'lpt6', 'lpt7', 'lpt8', 'lpt9', 'lpt0' ]
382    l = name.lower()
383    for r in reserved:
384        if l == r or l.startswith(r+"."):
385            return True
386    return False
387
388def haswinspecial(name):
389    specials = '<>:"\\|?*'
390    for c in name:
391        if c in specials:
392            return True
393    return False
394
395def winnames(root, parent, flist, output):
396    ret = 0
397    output.write("Illegal filenames (Windows):\n")
398    for f in flist():
399        if haswinspecial(f):
400            output.write("  "+f+": invalid character in name\n")
401            ret |= 1
402            continue
403
404        parts = f.split('/')
405        for p in parts:
406            if iswinreserved(p):
407                output.write("  "+f+": reserved file name\n")
408                ret |= 1
409                break
410
411    return ret
412
413def run_checks(root, parent, cmds, scmds, paths='', opts={}):
414    """Run the checks given in 'cmds', expected to have well-known signatures,
415    and report results for any which fail.
416
417    Return failure if any of them did.
418
419    NB: the function name of the commands passed in is used to name the NOT
420    file which excepts files from them."""
421
422    ret = 0
423
424    for cmd in cmds:
425        s = StringIO()
426
427        exclude = not_check(root, cmd.__name__)
428        result = cmd(root, parent, gen_files(root, parent, paths, exclude),
429                     output=s)
430        ret |= result
431
432        if result != 0:
433            print(s.getvalue())
434
435    for cmd in scmds:
436        s = StringIO()
437
438        exclude = not_check(root, cmd.__name__)
439        result = cmd(root, parent, gen_links(root, parent, paths, exclude),
440                     output=s)
441        ret |= result
442
443        if result != 0:
444            print(s.getvalue())
445
446    return ret
447
448def nits(root, parent, paths):
449    cmds = [copyright,
450            cstyle,
451            hdrchk,
452            jstyle,
453            keywords,
454            manlint,
455            mapfilechk,
456            shelllint,
457            pkgfmt,
458            winnames,
459            wscheck]
460    scmds = [symlinks]
461    run_checks(root, parent, cmds, scmds, paths)
462
463def pbchk(root, parent, paths):
464    cmds = [comchk,
465            copyright,
466            cstyle,
467            hdrchk,
468            jstyle,
469            keywords,
470            manlint,
471            mapfilechk,
472            shelllint,
473            pkgfmt,
474            winnames,
475            wscheck]
476    scmds = [symlinks]
477    run_checks(root, parent, cmds, scmds)
478
479def main(cmd, args):
480    parent_branch = None
481    checkname = None
482
483    try:
484        opts, args = getopt.getopt(args, 'b:c:p:')
485    except getopt.GetoptError as e:
486        sys.stderr.write(str(e) + '\n')
487        sys.stderr.write("Usage: %s [-c check] [-p branch] [path...]\n" % cmd)
488        sys.exit(1)
489
490    for opt, arg in opts:
491        # We accept "-b" as an alias of "-p" for backwards compatibility.
492        if opt == '-p' or opt == '-b':
493            parent_branch = arg
494        elif opt == '-c':
495            checkname = arg
496
497    if not parent_branch:
498        parent_branch = git_parent_branch(git_branch())
499
500    if checkname is None:
501        if cmd == 'git-pbchk':
502            checkname = 'pbchk'
503        else:
504            checkname = 'nits'
505
506    if checkname == 'pbchk':
507        if args:
508            sys.stderr.write("only complete workspaces may be pbchk'd\n");
509            sys.exit(1)
510        pbchk(git_root(), parent_branch, None)
511    elif checkname == 'nits':
512        nits(git_root(), parent_branch, args)
513    else:
514        run_checks(git_root(), parent_branch, [eval(checkname)], args)
515
516if __name__ == '__main__':
517    try:
518        main(os.path.basename(sys.argv[0]), sys.argv[1:])
519    except GitError as e:
520        sys.stderr.write("failed to run git:\n %s\n" % str(e))
521        sys.exit(1)
522