1 # Authors: Sylvain MARIE <sylvain.marie@se.com>
2 # + All contributors to <https://github.com/smarie/mkdocs-gallery>
3 #
4 # Original idea and code: sphinx-gallery, <https://sphinx-gallery.github.io>
5 # License: 3-clause BSD, <https://github.com/smarie/mkdocs-gallery/blob/master/LICENSE>
6 """
7 Scrapers for embedding images
8 =============================
9
10 Collect images that have been produced by code blocks.
11
12 The only scrapers we support are Matplotlib and Mayavi, others should
13 live in modules that will support them (e.g., PyVista, Plotly). Scraped
14 images are injected as rst ``image-sg`` directives into the ``.md``
15 file generated for each example script.
16 """
17
18 import os
19 import re
20 import sys
21 from packaging.version import parse as parse_version
22 from pathlib import Path
23 from textwrap import indent
24 from typing import Dict, List, Optional
25 from warnings import filterwarnings, warn
26
27 from .errors import ExtensionError
28 from .gen_data_model import GalleryScript
29 from .utils import optipng, rescale_image
30
31 __all__ = [
32 "save_figures",
33 "figure_md_or_html",
34 "clean_modules",
35 "matplotlib_scraper",
36 "mayavi_scraper",
37 "pyvista_scraper",
38 ]
39
40
41 ###############################################################################
42 # Scrapers
43
44
45 def _import_matplotlib():
46 """Import matplotlib safely."""
47 # make sure that the Agg backend is set before importing any
48 # matplotlib
49 import matplotlib
50
51 matplotlib.use("agg")
52 matplotlib_backend = matplotlib.get_backend().lower()
53
54 filterwarnings(
55 "ignore",
56 category=UserWarning,
57 message="Matplotlib is currently using agg, which is a" " non-GUI backend, so cannot show the figure.",
58 )
59
60 if matplotlib_backend != "agg":
61 raise ExtensionError(
62 "mkdocs-gallery relies on the matplotlib 'agg' backend to "
63 "render figures and write them to files. You are "
64 "currently using the {} backend. mkdocs-gallery will "
65 "terminate the build now, because changing backends is "
66 "not well supported by matplotlib. We advise you to move "
67 "mkdocs_gallery imports before any matplotlib-dependent "
68 "import. Moving mkdocs_gallery imports at the top of "
69 "your conf.py file should fix this issue".format(matplotlib_backend)
70 )
71
72 import matplotlib.pyplot as plt
73
74 return matplotlib, plt
75
76
77 def _matplotlib_fig_titles(fig):
78 titles = []
79 # get supertitle if exists
80 suptitle = getattr(fig, "_suptitle", None)
81 if suptitle is not None:
82 titles.append(suptitle.get_text())
83 # get titles from all axes, for all locs
84 title_locs = ["left", "center", "right"]
85 for ax in fig.axes:
86 for loc in title_locs:
87 text = ax.get_title(loc=loc)
88 if text:
89 titles.append(text)
90 fig_titles = ", ".join(titles)
91 return fig_titles
92
93
94 _ANIMATION_RST = """
95 <div class="mkd-glr-animation">
96 {html}
97 </div>
98 """
99
100
101 def matplotlib_scraper(block, script: GalleryScript, **kwargs):
102 """Scrape Matplotlib images.
103
104 Parameters
105 ----------
106 block : tuple
107 A tuple containing the (label, content, line_number) of the block.
108
109 script : GalleryScript
110 Dict of block variables.
111
112 **kwargs : dict
113 Additional keyword arguments to pass to
114 :meth:`~matplotlib.figure.Figure.savefig`, e.g. ``format='svg'``.
115 The ``format`` kwarg in particular is used to set the file extension
116 of the output file (currently only 'png', 'jpg', and 'svg' are
117 supported).
118
119 Returns
120 -------
121 md : str
122 The Markdown that will be rendered to HTML containing
123 the images. This is often produced by :func:`figure_md_or_html`.
124 """
125 try:
126 matplotlib, plt = _import_matplotlib()
127 except ImportError:
128 # Matplotlib is not installed. Ignore
129 # Note: we should better remove this (and the same in _reset_matplotlib)
130 # and auto-adjust the corresponding config option defaults (image_scrapers, reset_modules) when
131 # matplotlib is not present
132 return ""
133
134 gallery_conf = script.gallery_conf
135 from matplotlib.animation import Animation
136
137 image_mds = []
138
139 # Check for srcset hidpi images
140 srcset = gallery_conf.get("image_srcset", [])
141 srcset_mult_facs = [1] # one is always supplied...
142 for st in srcset:
143 if (len(st) > 0) and (st[-1] == "x"):
144 # "2x" = "2.0"
145 srcset_mult_facs += [float(st[:-1])]
146 elif st == "":
147 pass
148 else:
149 raise ExtensionError(
150 f'Invalid value for image_srcset parameter: "{st}". '
151 "Must be a list of strings with the multiplicative "
152 'factor followed by an "x". e.g. ["2.0x", "1.5x"]'
153 )
154
155 # Check for animations
156 anims = list()
157 if gallery_conf.get("matplotlib_animations", False):
158 for ani in script.run_vars.example_globals.values():
159 if isinstance(ani, Animation):
160 anims.append(ani)
161
162 # Then standard images
163 for fig_num, image_path in zip(plt.get_fignums(), script.run_vars.image_path_iterator):
164 image_path = Path(image_path)
165 if "format" in kwargs:
166 image_path = image_path.with_suffix("." + kwargs["format"])
167
168 # Set the fig_num figure as the current figure as we can't save a figure that's not the current figure.
169 fig = plt.figure(fig_num)
170
171 # Deal with animations
172 cont = False
173 for anim in anims:
174 if anim._fig is fig:
175 image_mds.append(_anim_md(anim, str(image_path), gallery_conf))
176 cont = True
177 break
178 if cont:
179 continue
180
181 # get fig titles
182 fig_titles = _matplotlib_fig_titles(fig)
183 to_rgba = matplotlib.colors.colorConverter.to_rgba
184
185 # shallow copy should be fine here, just want to avoid changing
186 # "kwargs" for subsequent figures processed by the loop
187 these_kwargs = kwargs.copy()
188 for attr in ["facecolor", "edgecolor"]:
189 fig_attr = getattr(fig, "get_" + attr)()
190 default_attr = matplotlib.rcParams["figure." + attr]
191 if to_rgba(fig_attr) != to_rgba(default_attr) and attr not in kwargs:
192 these_kwargs[attr] = fig_attr
193
194 # save the figures, and populate the srcsetpaths
195 try:
196 fig.savefig(image_path, **these_kwargs)
197 dpi0 = matplotlib.rcParams["savefig.dpi"]
198 if dpi0 == "figure":
199 dpi0 = fig.dpi
200 dpi0 = these_kwargs.get("dpi", dpi0)
201 srcsetpaths = {0: image_path}
202
203 # save other srcset paths, keyed by multiplication factor:
204 for mult in srcset_mult_facs:
205 if not (mult == 1):
206 multst = f"{mult}".replace(".", "_")
207 name = f"{image_path.stem}_{multst}x{image_path.suffix}"
208 hipath = image_path.parent / Path(name)
209 hikwargs = these_kwargs.copy()
210 hikwargs["dpi"] = mult * dpi0
211 fig.savefig(hipath, **hikwargs)
212 srcsetpaths[mult] = hipath
213 srcsetpaths = [srcsetpaths]
214 except Exception:
215 plt.close("all")
216 raise
217
218 if "images" in gallery_conf["compress_images"]:
219 optipng(image_path, gallery_conf["compress_images_args"])
220 for _, hipath in srcsetpaths[0].items():
221 optipng(hipath, gallery_conf["compress_images_args"])
222
223 image_mds.append((image_path, fig_titles, srcsetpaths))
224
225 plt.close("all")
226
227 # Create the markdown or html output
228 # <li>
229 # <img src="../_images/mkd_glr_plot_1_exp_001.png"
230 # srcset="../_images/mkd_glr_plot_1_exp_001.png, ../_images/mkd_glr_plot_1_exp_001_2_0x.png 2.0x"
231 # alt="Exponential function" class="sphx-glr-multi-img">
232 # </li>
233 # <li>
234 # <img src="../_images/mkd_glr_plot_1_exp_002.png"
235 # srcset="../_images/mkd_glr_plot_1_exp_002.png, ../_images/mkd_glr_plot_1_exp_002_2_0x.png 2.0x"
236 # alt="Negative exponential function" class="sphx-glr-multi-img">
237 # </li>
238
239 md = ""
240 if len(image_mds) == 1:
241 if isinstance(image_mds[0], str):
242 # an animation, see _anim_md
243 md = image_mds[0]
244 else:
245 # an image
246 image_path, fig_titles, srcsetpaths = image_mds[0]
247 md = figure_md_or_html([image_path], script, fig_titles, srcsetpaths=srcsetpaths)
248 elif len(image_mds) > 1:
249 # Old
250 # Replace the 'single' CSS class by the 'multi' one
251 # image_mds = [re.sub(r"mkd-glr-single-img", "mkd-glr-multi-img", image) for image in image_mds]
252 # image_mds = [HLIST_IMAGE_MATPLOTLIB % image for image in image_mds]
253 # md = HLIST_HEADER % (''.join(image_mds))
254
255 # New: directly use the html
256 image_htmls = []
257 for image_path, fig_titles, srcsetpaths in image_mds:
258 img_html = figure_md_or_html([image_path], script, fig_titles, srcsetpaths=srcsetpaths, raw_html=True)
259 image_htmls.append(img_html)
260 md = HLIST_HEADER % ("".join(image_htmls))
261 return md
262
263
264 def _anim_md(anim, image_path, gallery_conf):
265 import matplotlib
266 from matplotlib.animation import FFMpegWriter, ImageMagickWriter
267
268 # output the thumbnail as the image, as it will just be copied
269 # if it's the file thumbnail
270 fig = anim._fig
271 image_path = image_path.replace(".png", ".gif")
272 fig_size = fig.get_size_inches()
273 thumb_size = gallery_conf["thumbnail_size"]
274 use_dpi = round(min(t_s / f_s for t_s, f_s in zip(thumb_size, fig_size)))
275 # FFmpeg is buggy for GIFs before Matplotlib 3.3.1
276 if parse_version(matplotlib.__version__) >= parse_version("3.3.1") and FFMpegWriter.isAvailable():
277 writer = "ffmpeg"
278 elif ImageMagickWriter.isAvailable():
279 writer = "imagemagick"
280 else:
281 writer = None
282 anim.save(image_path, writer=writer, dpi=use_dpi)
283 html = anim._repr_html_()
284 if html is None: # plt.rcParams['animation.html'] == 'none'
285 html = anim.to_jshtml()
286 html = indent(html, " ")
287 return _ANIMATION_RST.format(html=html)
288
289
290 def mayavi_scraper(block, script: GalleryScript):
291 """Scrape Mayavi images.
292
293 Parameters
294 ----------
295 block : tuple
296 A tuple containing the (label, content, line_number) of the block.
297
298 script : GalleryScript
299 Script being run
300
301 Returns
302 -------
303 md : str
304 The ReSTructuredText that will be rendered to HTML containing
305 the images. This is often produced by :func:`figure_md_or_html`.
306 """
307 try:
308 from mayavi import mlab
309 except ModuleNotFoundError:
310 warn("No module named 'mayavi', skipping mayavi image scraper.")
-
E261
At least two spaces before inline comment
311 return "" # skip scraper function
312
313 image_path_iterator = script.run_vars.image_path_iterator
314 image_paths = list()
315 e = mlab.get_engine()
316 for scene, image_path in zip(e.scenes, image_path_iterator):
317 try:
318 mlab.savefig(str(image_path), figure=scene)
319 except Exception:
320 mlab.close(all=True)
321 raise
322 # make sure the image is not too large
323 rescale_image(image_path, image_path, 850, 999)
324 if "images" in script.gallery_conf["compress_images"]:
325 optipng(image_path, script.gallery_conf["compress_images_args"])
326 image_paths.append(image_path)
327 mlab.close(all=True)
328 return figure_md_or_html(image_paths, script)
329
-
E302
Expected 2 blank lines, found 1
330 def pyvista_scraper(block, script: GalleryScript):
331 """Scrape PyVista images.
332
333 Parameters
334 ----------
335 block : tuple
336 A tuple containing the (label, content, line_number) of the block.
337
338 script : GalleryScript
339 Script being run
340
341 Returns
342 -------
343 md : str
344 The ReSTructuredText that will be rendered to HTML containing
345 the images. This is often produced by :func:`figure_md_or_html`.
346 """
347 import pyvista as pv
348 import pyvista.plotting as pv_plt
349 import shutil
350
351 if not pv.BUILDING_GALLERY:
352 raise RuntimeError(pv.BUILDING_GALLERY_ERROR_MSG)
353 if not pv.OFF_SCREEN:
354 raise RuntimeError("set pyvista.OFF_SCREEN=True to use the pyvista image scraper.")
355
356 image_path_iterator = script.run_vars.image_path_iterator
357 image_paths = list()
358 try:
-
E265
Block comment should start with '# '
359 # pyvista >= 0.40
360 figures = pv_plt.plotter._ALL_PLOTTERS
361 except AttributeError:
362 # pyvista < 0.40
363 figures = pv_plt._ALL_PLOTTERS
364 for _, plotter in figures.items():
365 fname = next(image_path_iterator)
366 if hasattr(plotter, "_gif_filename"):
367 # move gif to fname
368 fname = fname.with_suffix('').with_suffix(".gif")
369 shutil.move(plotter._gif_filename, fname)
370 else:
371 plotter.screenshot(fname)
372 image_paths.append(fname)
373 pv.close_all() # close and clear all plotters
374 return figure_md_or_html(image_paths, script)
375
376
377 _scraper_dict = dict(
378 matplotlib=matplotlib_scraper,
379 mayavi=mayavi_scraper,
380 pyvista=pyvista_scraper,
381 )
382
383
384 # For now, these are what we support
385 _KNOWN_IMG_EXTS = (".png", ".svg", ".jpg", ".gif")
386
387
388 class ImageNotFoundError(FileNotFoundError):
389 def __init__(self, path):
390 self.path = path
391
392 def __str__(self):
393 return f"Image {self.path} can not be found on disk, with any of the known extensions {_KNOWN_IMG_EXTS}"
394
395
396 def _find_image_ext(path: Path, raise_if_not_found: bool = True) -> Path:
397 """Find an image, tolerant of different file extensions."""
398
399 for ext in _KNOWN_IMG_EXTS:
400 this_path = path.with_suffix(ext)
401 if this_path.exists():
402 break
403 else:
404 if raise_if_not_found:
405 raise ImageNotFoundError(path)
406
407 # None exists. Default to png.
408 ext = ".png"
409 this_path = path.with_suffix(ext)
410
411 return this_path, ext
412
413
414 def save_figures(block, script: GalleryScript):
415 """Save all open figures of the example code-block.
416
417 Parameters
418 ----------
419 block : tuple
420 A tuple containing the (label, content, line_number) of the block.
421
422 script : GalleryScript
423 Script run.
424
425 Returns
426 -------
427 images_md : str
428 md code to embed the images in the document.
429 """
430 image_path_iterator = script.run_vars.image_path_iterator
431 all_md = ""
432 prev_count = len(image_path_iterator)
433 for scraper in script.gallery_conf["image_scrapers"]:
434 # Use the scraper to generate the md containing image(s) (may be several)
435 md = scraper(block, script)
436 if not isinstance(md, str):
437 raise ExtensionError(f"md from scraper {scraper!r} was not a string, got type {type(md)}:\n{md!r}")
438
439 # Make sure that all images generated by the scraper exist.
440 n_new = len(image_path_iterator) - prev_count
441 for ii in range(n_new):
442 current_path, ext = _find_image_ext(image_path_iterator.paths[prev_count + ii])
443 if not current_path.exists():
444 raise ExtensionError(f"Scraper {scraper!r} did not produce expected image:\n{current_path}")
445
446 all_md += md
447
448 return all_md
449
450
451 PREFIX_LEN = len("mkd_glr_")
452
453
454 def figure_md_or_html(
455 figure_paths: List[Path],
456 script: GalleryScript,
457 fig_titles: str = "",
458 srcsetpaths: List[Dict[float, Path]] = None,
459 raw_html=False,
460 ):
461 """Generate md or raw html for a list of image filenames.
462
463 Depending on whether we have one or more figures, we use a
464 single md call to 'image' or a horizontal list.
465
466 Parameters
467 ----------
468 figure_paths : List[Path]
469 List of strings of the figures' absolute paths.
470 sources_dir : Path
471 absolute path of Sphinx documentation sources
472 fig_titles : str
473 Titles of figures, empty string if no titles found. Currently
474 only supported for matplotlib figures, default = ''.
475 srcsetpaths : list or None
476 List of dictionaries containing absolute paths. If
477 empty, then srcset field is populated with the figure path.
478 (see ``image_srcset`` configuration option). Otherwise,
479 each dict is of the form
480 {0: /images/image.png, 2.0: /images/image_2_0x.png}
481 where the key is the multiplication factor and the contents
482 the path to the image created above.
483
484 Returns
485 -------
486 images_md : str
487 md code to embed the images in the document
488
489 The md code will have a custom ``image-sg`` directive that allows
490 multiple resolution images to be served e.g.:
491 ``:srcset: /plot_types/imgs/img_001.png,
492 /plot_types/imgs/img_2_0x.png 2.0x``
493
494 """
495
496 if srcsetpaths is None:
497 # this should never happen, but figure_md_or_html is public, so
498 # this has to be a kwarg...
499 srcsetpaths = [{0: fl} for fl in figure_paths]
500
501 # Get all images relative to the website sources root
-
F841
Local variable 'sources_dir' is assigned to but never used
502 sources_dir = script.gallery.all_info.mkdocs_docs_dir
503 script_md_dir = script.gallery.generated_dir
504
505 # Get alt text
506 alt = ""
507 if fig_titles:
508 alt = fig_titles
509 elif figure_paths:
510 file_name = os.path.split(str(figure_paths[0]))[1]
511 # remove ext & 'mkd_glr_' from start & n#'s from end
512 file_name_noext = os.path.splitext(file_name)[0][PREFIX_LEN:-4]
513 # replace - & _ with \s
514 file_name_final = re.sub(r"[-,_]", " ", file_name_noext)
515 alt = file_name_final
516
517 alt = _single_line_sanitize(alt)
518
519 images_md = ""
520 if len(figure_paths) == 1:
521 figure_path = figure_paths[0]
522 hinames = srcsetpaths[0]
523 srcset = _get_srcset_st(script_md_dir, hinames)
524 figure_path_rel_to_script_md_dir = figure_path.relative_to(script_md_dir).as_posix().lstrip("/")
525 if raw_html:
526 # html version
527 images_md = (
528 f'<img alt="{alt}" src="../{figure_path_rel_to_script_md_dir}" srcset="{srcset}", '
529 f'class="sphx-glr-single-img" />'
530 )
531 else:
532 # markdown version
533 images_md = f'![{alt}](./{figure_path_rel_to_script_md_dir}){{: .mkd-glr-single-img srcset="{srcset}"}}'
534
535 elif len(figure_paths) > 1:
536 images_md = HLIST_HEADER
537 for nn, figure_path in enumerate(figure_paths):
538 hinames = srcsetpaths[nn]
539 srcset = _get_srcset_st(script_md_dir, hinames)
540 figure_path_rel_to_script_md_dir = figure_path.relative_to(script_md_dir).as_posix().lstrip("/")
541 images_md += HLIST_SG_TEMPLATE % (
542 alt,
543 figure_path_rel_to_script_md_dir,
544 srcset,
545 )
546
547 return images_md
548
549
550 def _get_srcset_st(sources_dir: Path, hinames: Dict[float, Path]):
551 """
552 Create the srcset string for including on the md line.
553 ie. sources_dir might be /home/sample-proj/source,
554 hinames posix paths to
555 0: /home/sample-proj/source/plot_types/images/img1.png,
556 2.0: /home/sample-proj/source/plot_types/images/img1_2_0x.png,
557 The result will be:
558 '/plot_types/basic/images/mkd_glr_pie_001.png,
559 /plot_types/basic/images/mkd_glr_pie_001_2_0x.png 2.0x'
560 """
561 srcst = ""
562 for k in hinames.keys():
563 path = hinames[k].relative_to(sources_dir).as_posix().lstrip("/")
564 srcst += "../" + path
565 if k == 0:
566 srcst += ", "
567 else:
568 srcst += f" {k:1.1f}x, "
569 if srcst[-2:] == ", ":
570 srcst = srcst[:-2]
571 srcst += ""
572
573 return srcst
574
575
576 def _single_line_sanitize(s):
577 """Remove problematic newlines."""
578 # For example, when setting a :alt: for an image, it shouldn't have \n
579 # This is a function in case we end up finding other things to replace
580 return s.replace("\n", " ")
581
582
583 # The following strings are used when we have several pictures: we use
584 # an html div tag that our CSS uses to turn the lists into horizontal
585 # lists.
586 HLIST_HEADER = """
587 <ul class="mkd-glr-horizontal">
588 %s
589 </ul>
590 """
591
592 HLIST_IMAGE_MATPLOTLIB = """<li>
593 %s
594 </li>"""
595
596 HLIST_SG_TEMPLATE = """
597 * ![%s](../%s){: .mkd-glr-multi-img srcset="%s"}
598 """
599
600
601 ###############################################################################
602 # Module resetting
603
604
605 def _reset_matplotlib(gallery_conf, file: Path):
606 """Reset matplotlib."""
607 try:
-
F401
'matplotlib' imported but unused
608 import matplotlib
609 except ImportError:
610 # Matplotlib is not present: do not care
611 pass
612 else:
613 # Proceed with resetting it
614 _, plt = _import_matplotlib()
615 plt.rcdefaults()
616
617
618 def _reset_seaborn(gallery_conf, file: Path):
619 """Reset seaborn."""
620 # Horrible code to 'unload' seaborn, so that it resets
621 # its default when is load
622 # Python does not support unloading of modules
623 # https://bugs.python.org/issue9072
624 for module in list(sys.modules.keys()):
625 if "seaborn" in module:
626 del sys.modules[module]
627
628
629 _reset_dict = {
630 "matplotlib": _reset_matplotlib,
631 "seaborn": _reset_seaborn,
632 }
633
634
635 def clean_modules(gallery_conf: Dict, file: Optional[Path]):
636 """Remove, unload, or reset modules after running each example.
637
638 After a script is executed it can load a variety of settings that one
639 does not want to influence in other examples in the gallery.
640
641 Parameters
642 ----------
643 gallery_conf : dict
644 The gallery configuration.
645
646 file : Path
647 The example being run. Will be None when this is called entering
648 a directory of examples to be built.
649 """
650 for reset_module in gallery_conf["reset_modules"]:
651 reset_module(gallery_conf, file)