⬅ mkdocs_gallery/utils.py source

1 # Authors: Sylvain MARIE <sylvain.marie@se.com>
2 # + All contributors to <https://github.com/smarie/mkdocs-gallery>
3 #
4 # Original idea and code: sphinx-gallery, <https://sphinx-gallery.github.io>
5 # License: 3-clause BSD, <https://github.com/smarie/mkdocs-gallery/blob/master/LICENSE>
6 """
7 Utilities
8 =========
9  
10 Miscellaneous utilities.
11 """
12  
13 from __future__ import absolute_import, division, print_function
14  
15 import asyncio
16 import hashlib
17 import os
18 import re
  • S404 Consider possible security implications associated with the subprocess module.
19 import subprocess
20 from pathlib import Path
21 from shutil import copyfile, move
22 from typing import Tuple
23  
24 from . import mkdocs_compatibility
25 from .errors import ExtensionError
26  
27 logger = mkdocs_compatibility.getLogger("mkdocs-gallery")
28  
29  
30 def _get_image():
31 try:
32 from PIL import Image
33 except ImportError as exc: # capture the error for the modern way
34 try:
35 import Image
36 except ImportError:
37 raise ExtensionError(
38 "Could not import pillow, which is required " "to rescale images (e.g., for thumbnails): %s" % (exc,)
39 )
40 return Image
41  
42  
43 def rescale_image(in_file: Path, out_file: Path, max_width, max_height):
44 """Scales an image with the same aspect ratio centered in an
45 image box with the given max_width and max_height
46 if in_file == out_file the image can only be scaled down
47 """
48 # local import to avoid testing dependency on PIL:
49 Image = _get_image()
50 img = Image.open(in_file)
51 # XXX someday we should just try img.thumbnail((max_width, max_height)) ...
52 width_in, height_in = img.size
53 scale_w = max_width / float(width_in)
54 scale_h = max_height / float(height_in)
55  
56 if height_in * scale_w <= max_height:
57 scale = scale_w
58 else:
59 scale = scale_h
60  
61 if scale >= 1.0 and in_file.absolute().as_posix() == out_file.absolute().as_posix():
62 # do not proceed: the image can only be scaled down.
63 return
64  
65 width_sc = int(round(scale * width_in))
66 height_sc = int(round(scale * height_in))
67  
68 # resize the image using resize; if using .thumbnail and the image is
69 # already smaller than max_width, max_height, then this won't scale up
70 # at all (maybe could be an option someday...)
71 img = img.resize((width_sc, height_sc), Image.BICUBIC)
72 # img.thumbnail((width_sc, height_sc), Image.BICUBIC)
73 # width_sc, height_sc = img.size # necessary if using thumbnail
74  
75 # insert centered
76 thumb = Image.new("RGBA", (max_width, max_height), (255, 255, 255, 0))
77 pos_insert = ((max_width - width_sc) // 2, (max_height - height_sc) // 2)
78 thumb.paste(img, pos_insert)
79  
80 try:
81 thumb.save(out_file)
82 except IOError:
83 # try again, without the alpha channel (e.g., for JPEG)
84 thumb.convert("RGB").save(out_file)
85  
86  
87 def optipng(file: Path, args=()):
88 """Optimize a PNG in place.
89  
90 Parameters
91 ----------
92 file : Path
93 The file. If it ends with '.png', ``optipng -o7 fname`` will
94 be run. If it fails because the ``optipng`` executable is not found
95 or optipng fails, the function returns.
96 args : tuple
97 Extra command-line arguments, such as ``['-o7']``.
98 """
99 if file.suffix == ".png":
100 # -o7 because this is what CPython used
101 # https://github.com/python/cpython/pull/8032
102 fname = file.as_posix()
103 try:
  • S603 Subprocess call - check for execution of untrusted input.
104 subprocess.check_call(
105 ["optipng"] + list(args) + [fname],
106 stdout=subprocess.PIPE,
107 stderr=subprocess.PIPE,
108 )
109 except (subprocess.CalledProcessError, IOError): # FileNotFoundError
110 pass
111 else:
112 raise ValueError(f"File extension is not .png: {file}")
113  
114  
115 def _has_optipng():
116 try:
  • S607 Starting a process with a partial executable path
  • S603 Subprocess call - check for execution of untrusted input.
117 subprocess.check_call(["optipng", "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
118 except IOError: # FileNotFoundError
119 return False
120 else:
121 return True
122  
123  
124 def replace_ext(file: Path, new_ext: str, expected_ext: str = None) -> Path:
125 """Replace the extension in `file` with `new_ext`, with optional initial `expected_ext` check.
126  
127 Parameters
128 ----------
129 file : Path
130 the file path.
131  
132 new_ext : str
133 The new extension, e.g. '.ipynb'
134  
135 expected_ext : str
136 The expected original extension for checking, if provided.
137  
138 Returns
139 -------
140 new_file : Path
141 The same file with a different ext.
142 """
143 # Optional extension checking
144 if expected_ext is not None and file.suffix != expected_ext:
145 raise ValueError(f"Unrecognized file extension, expected {expected_ext}, got {file.suffix}")
146  
147 # Replace extension
148 return file.with_suffix(new_ext)
149  
150  
151 def get_md5sum(src_file: Path, mode="b"):
152 """Returns md5sum of file
153  
154 Parameters
155 ----------
156 src_file : str
157 Filename to get md5sum for.
158 mode : 't' or 'b'
159 File mode to open file with. When in text mode, universal line endings
160 are used to ensure consitency in hashes between platforms.
161 """
162 errors = "surrogateescape" if mode == "t" else None
163 with open(str(src_file), "r" + mode, errors=errors) as src_data:
164 src_content = src_data.read()
165 if mode == "t":
166 src_content = src_content.encode(errors=errors)
  • S303 Use of insecure MD2, MD4, MD5, or SHA1 hash function.
167 return hashlib.md5(src_content).hexdigest()
168  
169  
170 def _get_old_file(new_file: Path) -> Path:
171 """Return the same file without the .new suffix"""
172 assert new_file.name.endswith(".new") # noqa
173 return new_file.with_name(new_file.stem) # this removes the .new suffix
174  
175  
176 def _have_same_md5(file_a, file_b, mode: str = "b") -> bool:
177 """Return `True` if both files have the same md5, computed using `mode`."""
178 return get_md5sum(file_a, mode) == get_md5sum(file_b, mode)
179  
180  
181 def _smart_move_md5(src_file: Path, dst_file: Path, md5_mode: str = "b"):
182 """Move `src_file` to `dst_file`, overwriting `dst_file` only if md5 has changed.
183  
184 Parameters
185 ----------
186 src_file : Path
187 The source file path.
188  
189 dst_file : Path
190 The destination file path.
191  
192 md5_mode : str
193 A string representing the md5 computation mode, 'b' or 't'
194 """
195 assert src_file.is_absolute() and dst_file.is_absolute() # noqa
196 assert src_file != dst_file # noqa
197  
198 if dst_file.exists() and _have_same_md5(dst_file, src_file, mode=md5_mode):
199 # Shortcut: destination is already identical, just delete the source
200 os.remove(src_file)
201 else:
202 # Proceed to the move operation
203 move(str(src_file), dst_file)
204 assert dst_file.exists() # noqa
205  
206 return dst_file
207  
208  
209 def _new_file(file: Path) -> Path:
210 """Return the same file path with a .new additional extension."""
211 return file.with_suffix(f"{file.suffix}.new")
212  
213  
214 def _replace_by_new_if_needed(file_new: Path, md5_mode: str = "b"):
215 """Use `file_new` (suffix .new) instead of the old file (same path but no suffix).
216  
217 If the new file is identical to the old one, the old one will not be touched.
218  
219 Parameters
220 ----------
221 file_new : Path
222 The new file, ending with .new suffix.
223  
224 md5_mode : str
225 A string representing the md5 computation mode, 'b' or 't'
226 """
227 _smart_move_md5(src_file=file_new, dst_file=_get_old_file(file_new), md5_mode=md5_mode)
228  
229  
230 def _smart_copy_md5(src_file: Path, dst_file: Path, src_md5: str = None, md5_mode: str = "b") -> Tuple[Path, str]:
231 """Copy `src_file` to `dst_file`, overwriting `dst_file`, only if md5 has changed.
232  
233 Parameters
234 ----------
235 src_file : Path
236 The source file path.
237  
238 dst_file : Path
239 The destination file path.
240  
241 src_md5 : str
242 If the source md5 was already computed, users may provide it here to avoid computing it again.
243  
244 md5_mode : str
245 A string representing the md5 computation mode, 'b' or 't'
246  
247 Returns
248 -------
249 md5 : str
250 The md5 of the file, if it has been provided or computed in the process, or None.
251 """
252 assert src_file.is_absolute() and dst_file.is_absolute() # noqa
253 assert src_file != dst_file # noqa
254  
255 if dst_file.exists():
256 if src_md5 is None:
257 src_md5 = get_md5sum(src_file, mode=md5_mode)
258  
259 dst_md5 = get_md5sum(dst_file, mode=md5_mode)
260 if src_md5 == dst_md5:
261 # Shortcut: nothing to do
262 return src_md5
263  
264 # Proceed to the copy operation
265 copyfile(src_file, dst_file)
266 assert dst_file.exists() # noqa
267  
268 return src_md5
269  
270  
271 # def check_md5sum_changed(src_file: Path, src_md5: str = None, md5_mode='b') -> Tuple[bool, str]:
272 # """Checks whether src_file has the same md5 hash as the one on disk on not
273 #
274 # Legacy name: md5sum_is_current
275 #
276 # Parameters
277 # ----------
278 # src_file : Path
279 # The file to check
280 #
281 # md5_mode : str
282 # The md5 computation mode
283 #
284 # Returns
285 # -------
286 # md5_has_changed : bool
287 # A boolean indicating if src_file has changed with respect
288 #
289 # actual_md5 : str
290 # The actual md5 of src_file
291 # """
292 #
293 # # Compute the md5 of the src_file
294 # actual_md5 = get_md5sum(src_file, mode=mode)
295 #
296 # # Grab the already computed md5 if it exists, and compare
297 # src_md5_file = src_file.with_name(src_file.name + '.md5')
298 # if src_md5_file.exists():
299 # ref_md5 = src_md5_file.read_text()
300 # md5_has_changed = (actual_md5 != ref_md5)
301 # else:
302 # md5_has_changed = True
303 #
304 # return md5_has_changed, actual_md5
305  
306  
307 class Bunch(dict):
308 """Dictionary-like object that exposes its keys as attributes."""
309  
310 def __init__(self, **kwargs): # noqa: D102
311 dict.__init__(self, kwargs)
312 self.__dict__ = self
313  
314  
315 def _has_pypandoc():
316 """Check if pypandoc package available."""
317 try:
318 import pypandoc # noqa
319  
320 # Import error raised only when function called
321 version = pypandoc.get_pandoc_version()
322 except (ImportError, OSError):
323 return None, None
324 else:
325 return True, version
326  
327  
328 def matches_filepath_pattern(filepath: Path, pattern: str) -> bool:
329 """
330 Check if filepath matches pattern
331  
332 Parameters
333 ----------
334 filepath
335 The filepath to check
336  
337 pattern
338 The pattern to search
339  
340 Returns
341 -------
342 rc
343 A boolean indicating whether the pattern has been found in the filepath
344 """
345  
346 assert isinstance(filepath, Path) # noqa
347  
348 result = re.search(pattern, str(filepath))
349  
350 return True if result is not None else False
351  
352  
353 def is_relative_to(parentpath: Path, subpath: Path) -> bool:
354 """
355 Check if subpath is relative to parentpath
356  
357 Parameters
358 ----------
359 parentpath
360 The (potential) parent path
361  
362 subpath
363 The (potential) subpath
364  
365 Returns
366 -------
367 rc
368 A boolean indicating whether subpath is relative to parentpath
369 """
370  
371 if not (isinstance(parentpath, Path) and isinstance(subpath, Path)):
372 raise TypeError("Arguments must both be pathlib objects")
373  
374 try:
375 subpath.relative_to(parentpath)
376 return True
377  
378 except ValueError:
379 return False
380  
381  
382 def run_async(coro):
383 try:
384 loop = asyncio.get_running_loop()
385 except RuntimeError:
386 loop = asyncio.new_event_loop()
387  
388 try:
389 return loop.run_until_complete(coro)
390 finally:
391 loop.close()