| # Copyright 2021 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Top-level presubmit script for the Git repo backing chromium.org. |
| |
| See https://www.chromium.org/developers/how-tos/depottools/presubmit-scripts |
| for more details about the presubmit API built into depot_tools. |
| """ |
| |
| import re |
| from typing import Iterator, List, NamedTuple, Tuple |
| import urllib.parse |
| |
| PRESUBMIT_VERSION = '2.0.0' |
| |
| # This line is 'magic' in that git-cl looks for it to decide whether to |
| # use Python3 instead of Python2 when running the code in this file. |
| USE_PYTHON3 = True |
| |
| |
| def get_lob_extensions(input_api) -> Iterator[str]: |
| """Get the list of known LOB extensions.""" |
| path = input_api.os_path.join(input_api.PresubmitLocalPath(), 'site', |
| '.gitignore') |
| ilines = input_api.ReadFile(path).splitlines() |
| for line in ilines: |
| if line != '# start_lob_ignore': |
| continue |
| |
| for line in ilines: |
| if line == '# end_lob_ignore': |
| return |
| |
| if line and not line.startswith('#'): |
| assert line.startswith('*') |
| yield line[1:] |
| |
| |
| def CheckChangeHasOnlyOneEol(input_api, output_api): |
| """Check ending of files.""" |
| return input_api.canned_checks.CheckChangeHasOnlyOneEol( |
| input_api, |
| output_api, |
| source_file_filter=lambda x: x.LocalPath().endswith( |
| '.md') or x.LocalPath().endswith('.py')) |
| |
| |
| def CheckPatchFormatted(input_api, output_api): |
| """Check formatting of files.""" |
| return input_api.canned_checks.CheckPatchFormatted(input_api, output_api) |
| |
| |
| def CheckChangeHasDescription(input_api, output_api): |
| return input_api.canned_checks.CheckChangeHasDescription( |
| input_api, output_api) |
| |
| |
| def CheckForLobs(input_api, output_api): |
| output_status = [] |
| lob_extensions = list(get_lob_extensions(input_api)) |
| for file in input_api.change.AffectedFiles(): |
| # The tar.gz for example prevents using a hashmap to look up the |
| # extension. |
| for ext in lob_extensions: |
| if str(file).endswith(ext) and file.Action() != 'D': |
| error_msg = ( |
| 'The file \'{file_name}\' is a binary that has not been ' |
| 'uploaded to GCE. Please run:\n\tscripts/upload_lobs.py ' |
| '"{file_name}"\nand commit {file_name}.sha1 instead\n' |
| 'Run:\n\tgit rm --cached "{file_name}"\n' |
| 'to remove the lob from git'.format( |
| file_name=file.LocalPath())) |
| |
| error = output_api.PresubmitError(error_msg) |
| output_status.append(error) |
| break |
| |
| return output_status |
| |
| |
| class _MdLink(NamedTuple): |
| """Link found in markdown.""" |
| |
| # The file link is found in. |
| file: str |
| |
| # The file has just been created. |
| new: bool |
| |
| # The visible link text. |
| text: str |
| |
| # The actual link. |
| uri: str |
| |
| # Whether the link supports local/relative paths like /dir/foo.md. |
| relative_ok: bool |
| |
| # What line was the link found on? |
| line_num: int |
| |
| |
| # Mapping of preferred host names. If we find people using <key>, we'll |
| # make them use <value> instead. |
| _MD_HOST_ALIASES = { |
| # keep-sorted start |
| 'b': 'issuetracker.google.com', |
| 'chromium.org': 'www.chromium.org', |
| 'dev.chromium.org': 'www.chromium.org', |
| 'goto': 'go', |
| 'goto.google.com': 'go', |
| 'www.youtube.com': 'youtube.com', |
| # keep-sorted end |
| } |
| |
| # These hosts should always use https:// |
| # This isn't an exhaustive list, just hosts we commonly refer to. |
| # TODO(vapier): Require https:// on all hosts by default, and require any |
| # actual http:// hosts be enumerated below. This requires a large cleanup |
| # of existing docs first. |
| _MD_HTTPS_HOSTS = { |
| # keep-sorted start |
| 'crbug.com', |
| 'crrev.com', |
| 'en.wikipedia.org', |
| 'github.com', |
| 'google.com', |
| 'issuetracker.google.com', |
| 'www.chromium.org', |
| 'www.google.com', |
| 'www.w3.org', |
| 'youtu.be', |
| 'youtube.com', |
| # keep-sorted end |
| } |
| |
| # These hosts should always use http:// |
| _MD_HTTP_HOSTS = { |
| # keep-sorted start |
| 'g', |
| 'go', |
| # keep-sorted end |
| } |
| |
| |
| def _extract_markdown_links(results, input_api, output_api, |
| affected_file) -> List[_MdLink]: |
| """Walk `affected_file` and extract links. |
| |
| This parser is not complete relative to the spec our website supports, but |
| it covers pretty much all the actual usage we have. |
| |
| We have a variety of styles: |
| [text](link) |
| [anchor]: link |
| [anchor]: link "extra text" |
| [^footnote] |
| [^footnote]: content |
| <link> |
| link |
| """ |
| # Extract all the content that links will be rendered in. Basically, |
| # filter out all the code blocks where content is not rendered. |
| file = affected_file.LocalPath() |
| new_file = affected_file.Action() == 'A' |
| line_gen = enumerate(affected_file.NewContents(), start=1) |
| filtered_lines = [] |
| for i, line in line_gen: |
| # Ignore ``` blocks because the contents are not markdown, and they |
| # might use code that matches the link syntax (e.g. regexes). |
| # TODO: <pre><code> blocks. |
| # TODO: `...` inline text -- but outside of [text]. |
| # TODO: 4 space automatic indented blocks. |
| sline = line.strip() |
| # Handle things like: |
| # ``` |
| # ```language |
| # ```this is triple ` quotes inline``` |
| if sline.startswith('```') and (len(sline) == 3 |
| or not sline.endswith('```')): |
| while True: |
| # Add stub lines to keep line numbers accurate. |
| filtered_lines.append('') |
| try: |
| _, line = next(line_gen) |
| except StopIteration: |
| results.append( |
| output_api.PresubmitError( |
| f'{file}:{i}: Missing closing ``` blocks')) |
| line = '```' |
| if line.lstrip().startswith('```'): |
| break |
| |
| filtered_lines.append(line) |
| |
| content = '\n'.join(filtered_lines) |
| |
| def _match_to_details(m) -> Tuple[str, str, int]: |
| """Find line number for match object.""" |
| text = ' '.join(m.group(1).strip().split()) |
| link = m.group(2) |
| # This is not efficient, but seems to be simple & fast enough to not |
| # warrant an intelligent algorithm. |
| linenum = content[0:m.start()].count('\n') + 1 |
| return (text, link, linenum) |
| |
| ret = [] |
| |
| # [text](link) |
| # While [text] may span multiple lines, (link) may not. |
| for m in re.finditer(r'\[([^]]+)\]\(([^)\s]+)\)', content, flags=re.M): |
| text, link, linenum = _match_to_details(m) |
| ret += [_MdLink(file, new_file, text, link, True, linenum)] |
| |
| # [anchor]: link |
| for m in re.finditer(r'^\[([^]]+)\]:(?: *)(\S+)', content, flags=re.M): |
| text, link, linenum = _match_to_details(m) |
| # [^footnote]: link |
| if not text.startswith('^'): |
| ret += [_MdLink(file, new_file, text, link, True, linenum)] |
| |
| # <link> |
| for m in re.finditer(r'<((https?://[^>]+))>', content, flags=re.M): |
| text, link, linenum = _match_to_details(m) |
| ret += [_MdLink(file, new_file, text, link, False, linenum)] |
| |
| return ret |
| |
| |
| def CheckLinks(input_api, output_api): |
| """Check links used in markdown.""" |
| # Build up the files to analyze. |
| affected_files = input_api.AffectedFiles( |
| file_filter=lambda x: x.LocalPath().endswith('.md')) |
| |
| results = [] |
| |
| # Extract the links from the files. |
| links = [] |
| for affected_file in affected_files: |
| links += _extract_markdown_links(results, input_api, output_api, |
| affected_file) |
| |
| # Check links. |
| |
| def _create_result(link, msg, want_uri) -> None: |
| want_link = urllib.parse.urlunparse(want_uri) |
| results.append( |
| output_api.PresubmitError(f'{link.file}:{link.line_num}: {msg}', |
| long_text=f'- {link.uri}\n+ {want_link}')) |
| |
| for link in links: |
| o = urllib.parse.urlparse(link.uri) |
| |
| # Check bad http:// usage. |
| if o.scheme == 'http' and o.netloc in _MD_HTTPS_HOSTS: |
| _create_result(link, 'Always use https:// with this host', |
| o._replace(scheme='https')) |
| |
| # Check bad https:// usage. |
| if o.scheme == 'https' and o.netloc in _MD_HTTP_HOSTS: |
| _create_result(link, 'Always use http:// with this host', |
| o._replace(scheme='http')) |
| |
| # Check host aliases. |
| for oldhost, newhost in _MD_HOST_ALIASES.items(): |
| if o.netloc == oldhost: |
| _create_result(link, f'Use {newhost} in links', |
| o._replace(netloc=newhost)) |
| |
| # Have people use relative /foo/bar links instead of |
| # https//www.chromium.org/foo/bar so we can check target links, and so |
| # navigating via the sandbox website works correctly. |
| if (link.relative_ok and o.netloc == 'www.chromium.org' |
| and link.file.startswith('site/')): |
| _create_result( |
| link, 'Use local paths instead of www.chromium.org in links', |
| o._replace(scheme='', netloc='', path=o.path or '/')) |
| |
| # People shouldn't link to chromium.org markdown pages via gitiles. |
| if o.netloc == 'chromium.googlesource.com' and o.path.startswith( |
| '/website/+/HEAD/site/'): |
| path = o.path[20:] |
| if path.endswith('/index.md'): |
| path = path[:-9] |
| _create_result( |
| link, 'Use local paths instead of chromium.googlesource.com', |
| o._replace(scheme='', netloc='', path=path)) |
| |
| # Check relative links for generated docs (under site/). |
| if o.scheme == o.netloc == '' and link.file.startswith('site/'): |
| # Relative links to markdown files don't work in generated pages. |
| if (o.path.startswith('/') |
| or o.path.startswith('.')) and o.path.endswith('.md'): |
| _create_result( |
| link, 'Do not link directly to markdown files', |
| o._replace(path='/'.join(o.path.split('/')[:-1]))) |
| |
| # The /site/ prefix is removed in generated content, but works when |
| # viewing under gitiles, so sometimes people test the wrong page. |
| if o.path.startswith('/site/'): |
| _create_result(link, 'Omit the /site/ prefix in local paths', |
| o._replace(path=o.path[5:])) |
| |
| # Verify local paths exist. |
| if o.path: |
| # Anchor absolute paths under site/, otherwise it's relative to |
| # the document. |
| local_path = input_api.os_path.join( |
| 'site' if o.path.startswith('/') else |
| input_api.os_path.dirname(link.file), |
| urllib.parse.unquote(o.path.lstrip('/'))) |
| |
| if o.path == '/system/errors/NodeNotFound' and not link.new: |
| results.append( |
| output_api.PresubmitPromptWarning( |
| f'{link.file}:{link.line_num}: ' |
| f'Missing link: {o.path}')) |
| |
| # Links can point to: |
| # * Directory (with implicit /index.md). |
| # /foo/bar points to /foo/bar/index.md |
| # * LOB file (e.g. images). |
| # /foo/bar.png has /foo/bar.png.sha1 |
| # * Raw file (e.g. html); does not support .md files. |
| # /foo/bar.html |
| # Links can also point to specific md files, but we don't use |
| # that style, so don't support it. We prefer to use dirs and |
| # index.md files exclusively. |
| elif not (input_api.os_path.exists( |
| input_api.os_path.join(local_path, 'index.md')) |
| or input_api.os_path.exists(local_path + '.sha1') |
| or input_api.os_path.isfile(local_path)): |
| # TODO(vapier): Make these fatal. |
| if '.' not in local_path and not link.new: |
| results.append( |
| output_api.PresubmitPromptWarning( |
| f'{link.file}:{link.line_num}: ' |
| f'Missing link: {o.path}')) |
| else: |
| _create_result(link, 'Link appears to be broken', |
| o._replace(path='???', fragment='')) |
| |
| return results |