⬅ mkdocs_gallery/scrapers.py source

1 # Authors: Sylvain MARIE <sylvain.marie@se.com>
2 # + All contributors to <https://github.com/smarie/mkdocs-gallery>
3 #
4 # Original idea and code: sphinx-gallery, <https://sphinx-gallery.github.io>
5 # License: 3-clause BSD, <https://github.com/smarie/mkdocs-gallery/blob/master/LICENSE>
6 """
7 Scrapers for embedding images
8 =============================
9  
10 Collect images that have been produced by code blocks.
11  
12 The only scrapers we support are Matplotlib and Mayavi, others should
13 live in modules that will support them (e.g., PyVista, Plotly). Scraped
14 images are injected as rst ``image-sg`` directives into the ``.md``
15 file generated for each example script.
16 """
17  
18 import os
19 import re
20 import sys
21 from packaging.version import parse as parse_version
22 from pathlib import Path
23 from textwrap import indent
24 from typing import Dict, List, Optional
25 from warnings import filterwarnings, warn
26  
27 from .errors import ExtensionError
28 from .gen_data_model import GalleryScript
29 from .utils import optipng, rescale_image
30  
31 __all__ = [
32 "save_figures",
33 "figure_md_or_html",
34 "clean_modules",
35 "matplotlib_scraper",
36 "mayavi_scraper",
37 "pyvista_scraper",
38 ]
39  
40  
41 ###############################################################################
42 # Scrapers
43  
44  
45 def _import_matplotlib():
46 """Import matplotlib safely."""
47 # make sure that the Agg backend is set before importing any
48 # matplotlib
49 import matplotlib
50  
51 matplotlib.use("agg")
52 matplotlib_backend = matplotlib.get_backend().lower()
53  
54 filterwarnings(
55 "ignore",
56 category=UserWarning,
57 message="Matplotlib is currently using agg, which is a" " non-GUI backend, so cannot show the figure.",
58 )
59  
60 if matplotlib_backend != "agg":
61 raise ExtensionError(
62 "mkdocs-gallery relies on the matplotlib 'agg' backend to "
63 "render figures and write them to files. You are "
64 "currently using the {} backend. mkdocs-gallery will "
65 "terminate the build now, because changing backends is "
66 "not well supported by matplotlib. We advise you to move "
67 "mkdocs_gallery imports before any matplotlib-dependent "
68 "import. Moving mkdocs_gallery imports at the top of "
69 "your conf.py file should fix this issue".format(matplotlib_backend)
70 )
71  
72 import matplotlib.pyplot as plt
73  
74 return matplotlib, plt
75  
76  
77 def _matplotlib_fig_titles(fig):
78 titles = []
79 # get supertitle if exists
80 suptitle = getattr(fig, "_suptitle", None)
81 if suptitle is not None:
82 titles.append(suptitle.get_text())
83 # get titles from all axes, for all locs
84 title_locs = ["left", "center", "right"]
85 for ax in fig.axes:
86 for loc in title_locs:
87 text = ax.get_title(loc=loc)
88 if text:
89 titles.append(text)
90 fig_titles = ", ".join(titles)
91 return fig_titles
92  
93  
94 _ANIMATION_RST = """
95 <div class="mkd-glr-animation">
96 {html}
97 </div>
98 """
99  
100  
101 def matplotlib_scraper(block, script: GalleryScript, **kwargs):
102 """Scrape Matplotlib images.
103  
104 Parameters
105 ----------
106 block : tuple
107 A tuple containing the (label, content, line_number) of the block.
108  
109 script : GalleryScript
110 Dict of block variables.
111  
112 **kwargs : dict
113 Additional keyword arguments to pass to
114 :meth:`~matplotlib.figure.Figure.savefig`, e.g. ``format='svg'``.
115 The ``format`` kwarg in particular is used to set the file extension
116 of the output file (currently only 'png', 'jpg', and 'svg' are
117 supported).
118  
119 Returns
120 -------
121 md : str
122 The Markdown that will be rendered to HTML containing
123 the images. This is often produced by :func:`figure_md_or_html`.
124 """
125 try:
126 matplotlib, plt = _import_matplotlib()
127 except ImportError:
128 # Matplotlib is not installed. Ignore
129 # Note: we should better remove this (and the same in _reset_matplotlib)
130 # and auto-adjust the corresponding config option defaults (image_scrapers, reset_modules) when
131 # matplotlib is not present
132 return ""
133  
134 gallery_conf = script.gallery_conf
135 from matplotlib.animation import Animation
136  
137 image_mds = []
138  
139 # Check for srcset hidpi images
140 srcset = gallery_conf.get("image_srcset", [])
141 srcset_mult_facs = [1] # one is always supplied...
142 for st in srcset:
143 if (len(st) > 0) and (st[-1] == "x"):
144 # "2x" = "2.0"
145 srcset_mult_facs += [float(st[:-1])]
146 elif st == "":
147 pass
148 else:
149 raise ExtensionError(
150 f'Invalid value for image_srcset parameter: "{st}". '
151 "Must be a list of strings with the multiplicative "
152 'factor followed by an "x". e.g. ["2.0x", "1.5x"]'
153 )
154  
155 # Check for animations
156 anims = list()
157 if gallery_conf.get("matplotlib_animations", False):
158 for ani in script.run_vars.example_globals.values():
159 if isinstance(ani, Animation):
160 anims.append(ani)
161  
162 # Then standard images
163 for fig_num, image_path in zip(plt.get_fignums(), script.run_vars.image_path_iterator):
164 image_path = Path(image_path)
165 if "format" in kwargs:
166 image_path = image_path.with_suffix("." + kwargs["format"])
167  
168 # Set the fig_num figure as the current figure as we can't save a figure that's not the current figure.
169 fig = plt.figure(fig_num)
170  
171 # Deal with animations
172 cont = False
173 for anim in anims:
174 if anim._fig is fig:
175 image_mds.append(_anim_md(anim, str(image_path), gallery_conf))
176 cont = True
177 break
178 if cont:
179 continue
180  
181 # get fig titles
182 fig_titles = _matplotlib_fig_titles(fig)
183 to_rgba = matplotlib.colors.colorConverter.to_rgba
184  
185 # shallow copy should be fine here, just want to avoid changing
186 # "kwargs" for subsequent figures processed by the loop
187 these_kwargs = kwargs.copy()
188 for attr in ["facecolor", "edgecolor"]:
189 fig_attr = getattr(fig, "get_" + attr)()
190 default_attr = matplotlib.rcParams["figure." + attr]
191 if to_rgba(fig_attr) != to_rgba(default_attr) and attr not in kwargs:
192 these_kwargs[attr] = fig_attr
193  
194 # save the figures, and populate the srcsetpaths
195 try:
196 fig.savefig(image_path, **these_kwargs)
197 dpi0 = matplotlib.rcParams["savefig.dpi"]
198 if dpi0 == "figure":
199 dpi0 = fig.dpi
200 dpi0 = these_kwargs.get("dpi", dpi0)
201 srcsetpaths = {0: image_path}
202  
203 # save other srcset paths, keyed by multiplication factor:
204 for mult in srcset_mult_facs:
205 if not (mult == 1):
206 multst = f"{mult}".replace(".", "_")
207 name = f"{image_path.stem}_{multst}x{image_path.suffix}"
208 hipath = image_path.parent / Path(name)
209 hikwargs = these_kwargs.copy()
210 hikwargs["dpi"] = mult * dpi0
211 fig.savefig(hipath, **hikwargs)
212 srcsetpaths[mult] = hipath
213 srcsetpaths = [srcsetpaths]
214 except Exception:
215 plt.close("all")
216 raise
217  
218 if "images" in gallery_conf["compress_images"]:
219 optipng(image_path, gallery_conf["compress_images_args"])
220 for _, hipath in srcsetpaths[0].items():
221 optipng(hipath, gallery_conf["compress_images_args"])
222  
223 image_mds.append((image_path, fig_titles, srcsetpaths))
224  
225 plt.close("all")
226  
227 # Create the markdown or html output
228 # <li>
229 # <img src="../_images/mkd_glr_plot_1_exp_001.png"
230 # srcset="../_images/mkd_glr_plot_1_exp_001.png, ../_images/mkd_glr_plot_1_exp_001_2_0x.png 2.0x"
231 # alt="Exponential function" class="sphx-glr-multi-img">
232 # </li>
233 # <li>
234 # <img src="../_images/mkd_glr_plot_1_exp_002.png"
235 # srcset="../_images/mkd_glr_plot_1_exp_002.png, ../_images/mkd_glr_plot_1_exp_002_2_0x.png 2.0x"
236 # alt="Negative exponential function" class="sphx-glr-multi-img">
237 # </li>
238  
239 md = ""
240 if len(image_mds) == 1:
241 if isinstance(image_mds[0], str):
242 # an animation, see _anim_md
243 md = image_mds[0]
244 else:
245 # an image
246 image_path, fig_titles, srcsetpaths = image_mds[0]
247 md = figure_md_or_html([image_path], script, fig_titles, srcsetpaths=srcsetpaths)
248 elif len(image_mds) > 1:
249 # Old
250 # Replace the 'single' CSS class by the 'multi' one
251 # image_mds = [re.sub(r"mkd-glr-single-img", "mkd-glr-multi-img", image) for image in image_mds]
252 # image_mds = [HLIST_IMAGE_MATPLOTLIB % image for image in image_mds]
253 # md = HLIST_HEADER % (''.join(image_mds))
254  
255 # New: directly use the html
256 image_htmls = []
257 for image_path, fig_titles, srcsetpaths in image_mds:
258 img_html = figure_md_or_html([image_path], script, fig_titles, srcsetpaths=srcsetpaths, raw_html=True)
259 image_htmls.append(img_html)
260 md = HLIST_HEADER % ("".join(image_htmls))
261 return md
262  
263  
264 def _anim_md(anim, image_path, gallery_conf):
265 import matplotlib
266 from matplotlib.animation import FFMpegWriter, ImageMagickWriter
267  
268 # output the thumbnail as the image, as it will just be copied
269 # if it's the file thumbnail
270 fig = anim._fig
271 image_path = image_path.replace(".png", ".gif")
272 fig_size = fig.get_size_inches()
273 thumb_size = gallery_conf["thumbnail_size"]
274 use_dpi = round(min(t_s / f_s for t_s, f_s in zip(thumb_size, fig_size)))
275 # FFmpeg is buggy for GIFs before Matplotlib 3.3.1
276 if parse_version(matplotlib.__version__) >= parse_version("3.3.1") and FFMpegWriter.isAvailable():
277 writer = "ffmpeg"
278 elif ImageMagickWriter.isAvailable():
279 writer = "imagemagick"
280 else:
281 writer = None
282 anim.save(image_path, writer=writer, dpi=use_dpi)
283 html = anim._repr_html_()
284 if html is None: # plt.rcParams['animation.html'] == 'none'
285 html = anim.to_jshtml()
286 html = indent(html, " ")
287 return _ANIMATION_RST.format(html=html)
288  
289  
290 def mayavi_scraper(block, script: GalleryScript):
291 """Scrape Mayavi images.
292  
293 Parameters
294 ----------
295 block : tuple
296 A tuple containing the (label, content, line_number) of the block.
297  
298 script : GalleryScript
299 Script being run
300  
301 Returns
302 -------
303 md : str
304 The ReSTructuredText that will be rendered to HTML containing
305 the images. This is often produced by :func:`figure_md_or_html`.
306 """
307 try:
308 from mayavi import mlab
309 except ModuleNotFoundError:
310 warn("No module named 'mayavi', skipping mayavi image scraper.")
  • E261 At least two spaces before inline comment
311 return "" # skip scraper function
312  
313 image_path_iterator = script.run_vars.image_path_iterator
314 image_paths = list()
315 e = mlab.get_engine()
316 for scene, image_path in zip(e.scenes, image_path_iterator):
317 try:
318 mlab.savefig(str(image_path), figure=scene)
319 except Exception:
320 mlab.close(all=True)
321 raise
322 # make sure the image is not too large
323 rescale_image(image_path, image_path, 850, 999)
324 if "images" in script.gallery_conf["compress_images"]:
325 optipng(image_path, script.gallery_conf["compress_images_args"])
326 image_paths.append(image_path)
327 mlab.close(all=True)
328 return figure_md_or_html(image_paths, script)
329  
  • E302 Expected 2 blank lines, found 1
330 def pyvista_scraper(block, script: GalleryScript):
331 """Scrape PyVista images.
332  
333 Parameters
334 ----------
335 block : tuple
336 A tuple containing the (label, content, line_number) of the block.
337  
338 script : GalleryScript
339 Script being run
340  
341 Returns
342 -------
343 md : str
344 The ReSTructuredText that will be rendered to HTML containing
345 the images. This is often produced by :func:`figure_md_or_html`.
346 """
347 import pyvista as pv
348 import pyvista.plotting as pv_plt
349 import shutil
350  
351 if not pv.BUILDING_GALLERY:
352 raise RuntimeError(pv.BUILDING_GALLERY_ERROR_MSG)
353 if not pv.OFF_SCREEN:
354 raise RuntimeError("set pyvista.OFF_SCREEN=True to use the pyvista image scraper.")
355  
356 image_path_iterator = script.run_vars.image_path_iterator
357 image_paths = list()
358 try:
  • E265 Block comment should start with '# '
359 # pyvista >= 0.40
360 figures = pv_plt.plotter._ALL_PLOTTERS
361 except AttributeError:
362 # pyvista < 0.40
363 figures = pv_plt._ALL_PLOTTERS
364 for _, plotter in figures.items():
365 fname = next(image_path_iterator)
366 if hasattr(plotter, "_gif_filename"):
367 # move gif to fname
368 fname = fname.with_suffix('').with_suffix(".gif")
369 shutil.move(plotter._gif_filename, fname)
370 else:
371 plotter.screenshot(fname)
372 image_paths.append(fname)
373 pv.close_all() # close and clear all plotters
374 return figure_md_or_html(image_paths, script)
375  
376  
377 _scraper_dict = dict(
378 matplotlib=matplotlib_scraper,
379 mayavi=mayavi_scraper,
380 pyvista=pyvista_scraper,
381 )
382  
383  
384 # For now, these are what we support
385 _KNOWN_IMG_EXTS = (".png", ".svg", ".jpg", ".gif")
386  
387  
388 class ImageNotFoundError(FileNotFoundError):
389 def __init__(self, path):
390 self.path = path
391  
392 def __str__(self):
393 return f"Image {self.path} can not be found on disk, with any of the known extensions {_KNOWN_IMG_EXTS}"
394  
395  
396 def _find_image_ext(path: Path, raise_if_not_found: bool = True) -> Path:
397 """Find an image, tolerant of different file extensions."""
398  
399 for ext in _KNOWN_IMG_EXTS:
400 this_path = path.with_suffix(ext)
401 if this_path.exists():
402 break
403 else:
404 if raise_if_not_found:
405 raise ImageNotFoundError(path)
406  
407 # None exists. Default to png.
408 ext = ".png"
409 this_path = path.with_suffix(ext)
410  
411 return this_path, ext
412  
413  
414 def save_figures(block, script: GalleryScript):
415 """Save all open figures of the example code-block.
416  
417 Parameters
418 ----------
419 block : tuple
420 A tuple containing the (label, content, line_number) of the block.
421  
422 script : GalleryScript
423 Script run.
424  
425 Returns
426 -------
427 images_md : str
428 md code to embed the images in the document.
429 """
430 image_path_iterator = script.run_vars.image_path_iterator
431 all_md = ""
432 prev_count = len(image_path_iterator)
433 for scraper in script.gallery_conf["image_scrapers"]:
434 # Use the scraper to generate the md containing image(s) (may be several)
435 md = scraper(block, script)
436 if not isinstance(md, str):
437 raise ExtensionError(f"md from scraper {scraper!r} was not a string, got type {type(md)}:\n{md!r}")
438  
439 # Make sure that all images generated by the scraper exist.
440 n_new = len(image_path_iterator) - prev_count
441 for ii in range(n_new):
442 current_path, ext = _find_image_ext(image_path_iterator.paths[prev_count + ii])
443 if not current_path.exists():
444 raise ExtensionError(f"Scraper {scraper!r} did not produce expected image:\n{current_path}")
445  
446 all_md += md
447  
448 return all_md
449  
450  
451 PREFIX_LEN = len("mkd_glr_")
452  
453  
454 def figure_md_or_html(
455 figure_paths: List[Path],
456 script: GalleryScript,
457 fig_titles: str = "",
458 srcsetpaths: List[Dict[float, Path]] = None,
459 raw_html=False,
460 ):
461 """Generate md or raw html for a list of image filenames.
462  
463 Depending on whether we have one or more figures, we use a
464 single md call to 'image' or a horizontal list.
465  
466 Parameters
467 ----------
468 figure_paths : List[Path]
469 List of strings of the figures' absolute paths.
470 sources_dir : Path
471 absolute path of Sphinx documentation sources
472 fig_titles : str
473 Titles of figures, empty string if no titles found. Currently
474 only supported for matplotlib figures, default = ''.
475 srcsetpaths : list or None
476 List of dictionaries containing absolute paths. If
477 empty, then srcset field is populated with the figure path.
478 (see ``image_srcset`` configuration option). Otherwise,
479 each dict is of the form
480 {0: /images/image.png, 2.0: /images/image_2_0x.png}
481 where the key is the multiplication factor and the contents
482 the path to the image created above.
483  
484 Returns
485 -------
486 images_md : str
487 md code to embed the images in the document
488  
489 The md code will have a custom ``image-sg`` directive that allows
490 multiple resolution images to be served e.g.:
491 ``:srcset: /plot_types/imgs/img_001.png,
492 /plot_types/imgs/img_2_0x.png 2.0x``
493  
494 """
495  
496 if srcsetpaths is None:
497 # this should never happen, but figure_md_or_html is public, so
498 # this has to be a kwarg...
499 srcsetpaths = [{0: fl} for fl in figure_paths]
500  
501 # Get all images relative to the website sources root
  • F841 Local variable 'sources_dir' is assigned to but never used
502 sources_dir = script.gallery.all_info.mkdocs_docs_dir
503 script_md_dir = script.gallery.generated_dir
504  
505 # Get alt text
506 alt = ""
507 if fig_titles:
508 alt = fig_titles
509 elif figure_paths:
510 file_name = os.path.split(str(figure_paths[0]))[1]
511 # remove ext & 'mkd_glr_' from start & n#'s from end
512 file_name_noext = os.path.splitext(file_name)[0][PREFIX_LEN:-4]
513 # replace - & _ with \s
514 file_name_final = re.sub(r"[-,_]", " ", file_name_noext)
515 alt = file_name_final
516  
517 alt = _single_line_sanitize(alt)
518  
519 images_md = ""
520 if len(figure_paths) == 1:
521 figure_path = figure_paths[0]
522 hinames = srcsetpaths[0]
523 srcset = _get_srcset_st(script_md_dir, hinames)
524 figure_path_rel_to_script_md_dir = figure_path.relative_to(script_md_dir).as_posix().lstrip("/")
525 if raw_html:
526 # html version
527 images_md = (
528 f'<img alt="{alt}" src="../{figure_path_rel_to_script_md_dir}" srcset="{srcset}", '
529 f'class="sphx-glr-single-img" />'
530 )
531 else:
532 # markdown version
533 images_md = f'![{alt}](./{figure_path_rel_to_script_md_dir}){{: .mkd-glr-single-img srcset="{srcset}"}}'
534  
535 elif len(figure_paths) > 1:
536 images_md = HLIST_HEADER
537 for nn, figure_path in enumerate(figure_paths):
538 hinames = srcsetpaths[nn]
539 srcset = _get_srcset_st(script_md_dir, hinames)
540 figure_path_rel_to_script_md_dir = figure_path.relative_to(script_md_dir).as_posix().lstrip("/")
541 images_md += HLIST_SG_TEMPLATE % (
542 alt,
543 figure_path_rel_to_script_md_dir,
544 srcset,
545 )
546  
547 return images_md
548  
549  
550 def _get_srcset_st(sources_dir: Path, hinames: Dict[float, Path]):
551 """
552 Create the srcset string for including on the md line.
553 ie. sources_dir might be /home/sample-proj/source,
554 hinames posix paths to
555 0: /home/sample-proj/source/plot_types/images/img1.png,
556 2.0: /home/sample-proj/source/plot_types/images/img1_2_0x.png,
557 The result will be:
558 '/plot_types/basic/images/mkd_glr_pie_001.png,
559 /plot_types/basic/images/mkd_glr_pie_001_2_0x.png 2.0x'
560 """
561 srcst = ""
562 for k in hinames.keys():
563 path = hinames[k].relative_to(sources_dir).as_posix().lstrip("/")
564 srcst += "../" + path
565 if k == 0:
566 srcst += ", "
567 else:
568 srcst += f" {k:1.1f}x, "
569 if srcst[-2:] == ", ":
570 srcst = srcst[:-2]
571 srcst += ""
572  
573 return srcst
574  
575  
576 def _single_line_sanitize(s):
577 """Remove problematic newlines."""
578 # For example, when setting a :alt: for an image, it shouldn't have \n
579 # This is a function in case we end up finding other things to replace
580 return s.replace("\n", " ")
581  
582  
583 # The following strings are used when we have several pictures: we use
584 # an html div tag that our CSS uses to turn the lists into horizontal
585 # lists.
586 HLIST_HEADER = """
587 <ul class="mkd-glr-horizontal">
588 %s
589 </ul>
590 """
591  
592 HLIST_IMAGE_MATPLOTLIB = """<li>
593 %s
594 </li>"""
595  
596 HLIST_SG_TEMPLATE = """
597 * ![%s](../%s){: .mkd-glr-multi-img srcset="%s"}
598 """
599  
600  
601 ###############################################################################
602 # Module resetting
603  
604  
605 def _reset_matplotlib(gallery_conf, file: Path):
606 """Reset matplotlib."""
607 try:
  • F401 'matplotlib' imported but unused
608 import matplotlib
609 except ImportError:
610 # Matplotlib is not present: do not care
611 pass
612 else:
613 # Proceed with resetting it
614 _, plt = _import_matplotlib()
615 plt.rcdefaults()
616  
617  
618 def _reset_seaborn(gallery_conf, file: Path):
619 """Reset seaborn."""
620 # Horrible code to 'unload' seaborn, so that it resets
621 # its default when is load
622 # Python does not support unloading of modules
623 # https://bugs.python.org/issue9072
624 for module in list(sys.modules.keys()):
625 if "seaborn" in module:
626 del sys.modules[module]
627  
628  
629 _reset_dict = {
630 "matplotlib": _reset_matplotlib,
631 "seaborn": _reset_seaborn,
632 }
633  
634  
635 def clean_modules(gallery_conf: Dict, file: Optional[Path]):
636 """Remove, unload, or reset modules after running each example.
637  
638 After a script is executed it can load a variety of settings that one
639 does not want to influence in other examples in the gallery.
640  
641 Parameters
642 ----------
643 gallery_conf : dict
644 The gallery configuration.
645  
646 file : Path
647 The example being run. Will be None when this is called entering
648 a directory of examples to be built.
649 """
650 for reset_module in gallery_conf["reset_modules"]:
651 reset_module(gallery_conf, file)