Coverage for src/mkdocs_gallery/utils.py: 77%

117 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-15 17:10 +0000

1# Authors: Sylvain MARIE <sylvain.marie@se.com> 

2# + All contributors to <https://github.com/smarie/mkdocs-gallery> 

3# 

4# Original idea and code: sphinx-gallery, <https://sphinx-gallery.github.io> 

5# License: 3-clause BSD, <https://github.com/smarie/mkdocs-gallery/blob/master/LICENSE> 

6""" 

7Utilities 

8========= 

9 

10Miscellaneous utilities. 

11""" 

12 

13from __future__ import absolute_import, division, print_function 

14 

15import hashlib 

16import os 

17import re 

18import subprocess 

19from pathlib import Path 

20from shutil import copyfile, move 

21from typing import Tuple 

22 

23from . import mkdocs_compatibility 

24from .errors import ExtensionError 

25 

26logger = mkdocs_compatibility.getLogger("mkdocs-gallery") 

27 

28 

29def _get_image(): 

30 try: 

31 from PIL import Image 

32 except ImportError as exc: # capture the error for the modern way 

33 try: 

34 import Image 

35 except ImportError: 

36 raise ExtensionError( 

37 "Could not import pillow, which is required " "to rescale images (e.g., for thumbnails): %s" % (exc,) 

38 ) 

39 return Image 

40 

41 

42def rescale_image(in_file: Path, out_file: Path, max_width, max_height): 

43 """Scales an image with the same aspect ratio centered in an 

44 image box with the given max_width and max_height 

45 if in_file == out_file the image can only be scaled down 

46 """ 

47 # local import to avoid testing dependency on PIL: 

48 Image = _get_image() 

49 img = Image.open(in_file) 

50 # XXX someday we should just try img.thumbnail((max_width, max_height)) ... 

51 width_in, height_in = img.size 

52 scale_w = max_width / float(width_in) 

53 scale_h = max_height / float(height_in) 

54 

55 if height_in * scale_w <= max_height: 

56 scale = scale_w 

57 else: 

58 scale = scale_h 

59 

60 if scale >= 1.0 and in_file.absolute().as_posix() == out_file.absolute().as_posix(): 

61 # do not proceed: the image can only be scaled down. 

62 return 

63 

64 width_sc = int(round(scale * width_in)) 

65 height_sc = int(round(scale * height_in)) 

66 

67 # resize the image using resize; if using .thumbnail and the image is 

68 # already smaller than max_width, max_height, then this won't scale up 

69 # at all (maybe could be an option someday...) 

70 img = img.resize((width_sc, height_sc), Image.BICUBIC) 

71 # img.thumbnail((width_sc, height_sc), Image.BICUBIC) 

72 # width_sc, height_sc = img.size # necessary if using thumbnail 

73 

74 # insert centered 

75 thumb = Image.new("RGBA", (max_width, max_height), (255, 255, 255, 0)) 

76 pos_insert = ((max_width - width_sc) // 2, (max_height - height_sc) // 2) 

77 thumb.paste(img, pos_insert) 

78 

79 try: 

80 thumb.save(out_file) 

81 except IOError: 

82 # try again, without the alpha channel (e.g., for JPEG) 

83 thumb.convert("RGB").save(out_file) 

84 

85 

86def optipng(file: Path, args=()): 

87 """Optimize a PNG in place. 

88 

89 Parameters 

90 ---------- 

91 file : Path 

92 The file. If it ends with '.png', ``optipng -o7 fname`` will 

93 be run. If it fails because the ``optipng`` executable is not found 

94 or optipng fails, the function returns. 

95 args : tuple 

96 Extra command-line arguments, such as ``['-o7']``. 

97 """ 

98 if file.suffix == ".png": 

99 # -o7 because this is what CPython used 

100 # https://github.com/python/cpython/pull/8032 

101 fname = file.as_posix() 

102 try: 

103 subprocess.check_call( 

104 ["optipng"] + list(args) + [fname], 

105 stdout=subprocess.PIPE, 

106 stderr=subprocess.PIPE, 

107 ) 

108 except (subprocess.CalledProcessError, IOError): # FileNotFoundError 

109 pass 

110 else: 

111 raise ValueError(f"File extension is not .png: {file}") 

112 

113 

114def _has_optipng(): 

115 try: 

116 subprocess.check_call(["optipng", "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 

117 except IOError: # FileNotFoundError 

118 return False 

119 else: 

120 return True 

121 

122 

123def replace_ext(file: Path, new_ext: str, expected_ext: str = None) -> Path: 

124 """Replace the extension in `file` with `new_ext`, with optional initial `expected_ext` check. 

125 

126 Parameters 

127 ---------- 

128 file : Path 

129 the file path. 

130 

131 new_ext : str 

132 The new extension, e.g. '.ipynb' 

133 

134 expected_ext : str 

135 The expected original extension for checking, if provided. 

136 

137 Returns 

138 ------- 

139 new_file : Path 

140 The same file with a different ext. 

141 """ 

142 # Optional extension checking 

143 if expected_ext is not None and file.suffix != expected_ext: 

144 raise ValueError(f"Unrecognized file extension, expected {expected_ext}, got {file.suffix}") 

145 

146 # Replace extension 

147 return file.with_suffix(new_ext) 

148 

149 

150def get_md5sum(src_file: Path, mode="b"): 

151 """Returns md5sum of file 

152 

153 Parameters 

154 ---------- 

155 src_file : str 

156 Filename to get md5sum for. 

157 mode : 't' or 'b' 

158 File mode to open file with. When in text mode, universal line endings 

159 are used to ensure consitency in hashes between platforms. 

160 """ 

161 errors = "surrogateescape" if mode == "t" else None 

162 with open(str(src_file), "r" + mode, errors=errors) as src_data: 

163 src_content = src_data.read() 

164 if mode == "t": 

165 src_content = src_content.encode(errors=errors) 

166 return hashlib.md5(src_content).hexdigest() 

167 

168 

169def _get_old_file(new_file: Path) -> Path: 

170 """Return the same file without the .new suffix""" 

171 assert new_file.name.endswith(".new") # noqa 

172 return new_file.with_name(new_file.stem) # this removes the .new suffix 

173 

174 

175def _have_same_md5(file_a, file_b, mode: str = "b") -> bool: 

176 """Return `True` if both files have the same md5, computed using `mode`.""" 

177 return get_md5sum(file_a, mode) == get_md5sum(file_b, mode) 

178 

179 

180def _smart_move_md5(src_file: Path, dst_file: Path, md5_mode: str = "b"): 

181 """Move `src_file` to `dst_file`, overwriting `dst_file` only if md5 has changed. 

182 

183 Parameters 

184 ---------- 

185 src_file : Path 

186 The source file path. 

187 

188 dst_file : Path 

189 The destination file path. 

190 

191 md5_mode : str 

192 A string representing the md5 computation mode, 'b' or 't' 

193 """ 

194 assert src_file.is_absolute() and dst_file.is_absolute() # noqa 

195 assert src_file != dst_file # noqa 

196 

197 if dst_file.exists() and _have_same_md5(dst_file, src_file, mode=md5_mode): 

198 # Shortcut: destination is already identical, just delete the source 

199 os.remove(src_file) 

200 else: 

201 # Proceed to the move operation 

202 move(str(src_file), dst_file) 

203 assert dst_file.exists() # noqa 

204 

205 return dst_file 

206 

207 

208def _new_file(file: Path) -> Path: 

209 """Return the same file path with a .new additional extension.""" 

210 return file.with_suffix(f"{file.suffix}.new") 

211 

212 

213def _replace_by_new_if_needed(file_new: Path, md5_mode: str = "b"): 

214 """Use `file_new` (suffix .new) instead of the old file (same path but no suffix). 

215 

216 If the new file is identical to the old one, the old one will not be touched. 

217 

218 Parameters 

219 ---------- 

220 file_new : Path 

221 The new file, ending with .new suffix. 

222 

223 md5_mode : str 

224 A string representing the md5 computation mode, 'b' or 't' 

225 """ 

226 _smart_move_md5(src_file=file_new, dst_file=_get_old_file(file_new), md5_mode=md5_mode) 

227 

228 

229def _smart_copy_md5(src_file: Path, dst_file: Path, src_md5: str = None, md5_mode: str = "b") -> Tuple[Path, str]: 

230 """Copy `src_file` to `dst_file`, overwriting `dst_file`, only if md5 has changed. 

231 

232 Parameters 

233 ---------- 

234 src_file : Path 

235 The source file path. 

236 

237 dst_file : Path 

238 The destination file path. 

239 

240 src_md5 : str 

241 If the source md5 was already computed, users may provide it here to avoid computing it again. 

242 

243 md5_mode : str 

244 A string representing the md5 computation mode, 'b' or 't' 

245 

246 Returns 

247 ------- 

248 md5 : str 

249 The md5 of the file, if it has been provided or computed in the process, or None. 

250 """ 

251 assert src_file.is_absolute() and dst_file.is_absolute() # noqa 

252 assert src_file != dst_file # noqa 

253 

254 if dst_file.exists(): 

255 if src_md5 is None: 255 ↛ 256line 255 didn't jump to line 256, because the condition on line 255 was never true

256 src_md5 = get_md5sum(src_file, mode=md5_mode) 

257 

258 dst_md5 = get_md5sum(dst_file, mode=md5_mode) 

259 if src_md5 == dst_md5: 259 ↛ 264line 259 didn't jump to line 264, because the condition on line 259 was never false

260 # Shortcut: nothing to do 

261 return src_md5 

262 

263 # Proceed to the copy operation 

264 copyfile(src_file, dst_file) 

265 assert dst_file.exists() # noqa 

266 

267 return src_md5 

268 

269 

270# def check_md5sum_changed(src_file: Path, src_md5: str = None, md5_mode='b') -> Tuple[bool, str]: 

271# """Checks whether src_file has the same md5 hash as the one on disk on not 

272# 

273# Legacy name: md5sum_is_current 

274# 

275# Parameters 

276# ---------- 

277# src_file : Path 

278# The file to check 

279# 

280# md5_mode : str 

281# The md5 computation mode 

282# 

283# Returns 

284# ------- 

285# md5_has_changed : bool 

286# A boolean indicating if src_file has changed with respect 

287# 

288# actual_md5 : str 

289# The actual md5 of src_file 

290# """ 

291# 

292# # Compute the md5 of the src_file 

293# actual_md5 = get_md5sum(src_file, mode=mode) 

294# 

295# # Grab the already computed md5 if it exists, and compare 

296# src_md5_file = src_file.with_name(src_file.name + '.md5') 

297# if src_md5_file.exists(): 

298# ref_md5 = src_md5_file.read_text() 

299# md5_has_changed = (actual_md5 != ref_md5) 

300# else: 

301# md5_has_changed = True 

302# 

303# return md5_has_changed, actual_md5 

304 

305 

306class Bunch(dict): 

307 """Dictionary-like object that exposes its keys as attributes.""" 

308 

309 def __init__(self, **kwargs): # noqa: D102 

310 dict.__init__(self, kwargs) 

311 self.__dict__ = self 

312 

313 

314def _has_pypandoc(): 

315 """Check if pypandoc package available.""" 

316 try: 

317 import pypandoc # noqa 

318 

319 # Import error raised only when function called 

320 version = pypandoc.get_pandoc_version() 

321 except (ImportError, OSError): 

322 return None, None 

323 else: 

324 return True, version 

325 

326 

327def matches_filepath_pattern(filepath: Path, pattern: str) -> bool: 

328 """ 

329 Check if filepath matches pattern 

330 

331 Parameters 

332 ---------- 

333 filepath 

334 The filepath to check 

335 

336 pattern 

337 The pattern to search 

338 

339 Returns 

340 ------- 

341 rc 

342 A boolean indicating whether the pattern has been found in the filepath 

343 """ 

344 

345 assert isinstance(filepath, Path) # noqa 

346 

347 result = re.search(pattern, str(filepath)) 

348 

349 return True if result is not None else False 

350 

351 

352def is_relative_to(parentpath: Path, subpath: Path) -> bool: 

353 """ 

354 Check if subpath is relative to parentpath 

355 

356 Parameters 

357 ---------- 

358 parentpath 

359 The (potential) parent path 

360 

361 subpath 

362 The (potential) subpath 

363 

364 Returns 

365 ------- 

366 rc 

367 A boolean indicating whether subpath is relative to parentpath 

368 """ 

369 

370 if not (isinstance(parentpath, Path) and isinstance(subpath, Path)): 

371 raise TypeError("Arguments must both be pathlib objects") 

372 

373 try: 

374 subpath.relative_to(parentpath) 

375 return True 

376 

377 except ValueError: 

378 return False