Coverage for src/mkdocs_gallery/scrapers.py: 84%
271 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-30 08:26 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-30 08:26 +0000
1# Authors: Sylvain MARIE <sylvain.marie@se.com>
2# + All contributors to <https://github.com/smarie/mkdocs-gallery>
3#
4# Original idea and code: sphinx-gallery, <https://sphinx-gallery.github.io>
5# License: 3-clause BSD, <https://github.com/smarie/mkdocs-gallery/blob/master/LICENSE>
6"""
7Scrapers for embedding images
8=============================
10Collect images that have been produced by code blocks.
12The only scrapers we support are Matplotlib and Mayavi, others should
13live in modules that will support them (e.g., PyVista, Plotly). Scraped
14images are injected as rst ``image-sg`` directives into the ``.md``
15file generated for each example script.
16"""
18import os
19import re
20import sys
21from packaging.version import parse as parse_version
22from pathlib import Path
23from textwrap import indent
24from typing import Dict, List, Optional
25from warnings import filterwarnings, warn
27from .errors import ExtensionError
28from .gen_data_model import GalleryScript
29from .utils import optipng, rescale_image
31__all__ = [
32 "save_figures",
33 "figure_md_or_html",
34 "clean_modules",
35 "matplotlib_scraper",
36 "mayavi_scraper",
37 "pyvista_scraper",
38]
41###############################################################################
42# Scrapers
45def _import_matplotlib():
46 """Import matplotlib safely."""
47 # make sure that the Agg backend is set before importing any
48 # matplotlib
49 import matplotlib
51 matplotlib.use("agg")
52 matplotlib_backend = matplotlib.get_backend().lower()
54 filterwarnings(
55 "ignore",
56 category=UserWarning,
57 message="Matplotlib is currently using agg, which is a" " non-GUI backend, so cannot show the figure.",
58 )
60 if matplotlib_backend != "agg": 60 ↛ 61line 60 didn't jump to line 61 because the condition on line 60 was never true
61 raise ExtensionError(
62 "mkdocs-gallery relies on the matplotlib 'agg' backend to "
63 "render figures and write them to files. You are "
64 "currently using the {} backend. mkdocs-gallery will "
65 "terminate the build now, because changing backends is "
66 "not well supported by matplotlib. We advise you to move "
67 "mkdocs_gallery imports before any matplotlib-dependent "
68 "import. Moving mkdocs_gallery imports at the top of "
69 "your conf.py file should fix this issue".format(matplotlib_backend)
70 )
72 import matplotlib.pyplot as plt
74 return matplotlib, plt
77def _matplotlib_fig_titles(fig):
78 titles = []
79 # get supertitle if exists
80 suptitle = getattr(fig, "_suptitle", None)
81 if suptitle is not None: 81 ↛ 82line 81 didn't jump to line 82 because the condition on line 81 was never true
82 titles.append(suptitle.get_text())
83 # get titles from all axes, for all locs
84 title_locs = ["left", "center", "right"]
85 for ax in fig.axes:
86 for loc in title_locs:
87 text = ax.get_title(loc=loc)
88 if text:
89 titles.append(text)
90 fig_titles = ", ".join(titles)
91 return fig_titles
94_ANIMATION_RST = """
95<div class="mkd-glr-animation">
96{html}
97</div>
98"""
101def matplotlib_scraper(block, script: GalleryScript, **kwargs):
102 """Scrape Matplotlib images.
104 Parameters
105 ----------
106 block : tuple
107 A tuple containing the (label, content, line_number) of the block.
109 script : GalleryScript
110 Dict of block variables.
112 **kwargs : dict
113 Additional keyword arguments to pass to
114 :meth:`~matplotlib.figure.Figure.savefig`, e.g. ``format='svg'``.
115 The ``format`` kwarg in particular is used to set the file extension
116 of the output file (currently only 'png', 'jpg', and 'svg' are
117 supported).
119 Returns
120 -------
121 md : str
122 The Markdown that will be rendered to HTML containing
123 the images. This is often produced by :func:`figure_md_or_html`.
124 """
125 try:
126 matplotlib, plt = _import_matplotlib()
127 except ImportError:
128 # Matplotlib is not installed. Ignore
129 # Note: we should better remove this (and the same in _reset_matplotlib)
130 # and auto-adjust the corresponding config option defaults (image_scrapers, reset_modules) when
131 # matplotlib is not present
132 return ""
134 gallery_conf = script.gallery_conf
135 from matplotlib.animation import Animation
137 image_mds = []
139 # Check for srcset hidpi images
140 srcset = gallery_conf.get("image_srcset", [])
141 srcset_mult_facs = [1] # one is always supplied...
142 for st in srcset:
143 if (len(st) > 0) and (st[-1] == "x"): 143 ↛ 146line 143 didn't jump to line 146 because the condition on line 143 was always true
144 # "2x" = "2.0"
145 srcset_mult_facs += [float(st[:-1])]
146 elif st == "":
147 pass
148 else:
149 raise ExtensionError(
150 f'Invalid value for image_srcset parameter: "{st}". '
151 "Must be a list of strings with the multiplicative "
152 'factor followed by an "x". e.g. ["2.0x", "1.5x"]'
153 )
155 # Check for animations
156 anims = list()
157 if gallery_conf.get("matplotlib_animations", False): 157 ↛ 163line 157 didn't jump to line 163 because the condition on line 157 was always true
158 for ani in script.run_vars.example_globals.values():
159 if isinstance(ani, Animation):
160 anims.append(ani)
162 # Then standard images
163 for fig_num, image_path in zip(plt.get_fignums(), script.run_vars.image_path_iterator):
164 image_path = Path(image_path)
165 if "format" in kwargs: 165 ↛ 166line 165 didn't jump to line 166 because the condition on line 165 was never true
166 image_path = image_path.with_suffix("." + kwargs["format"])
168 # Set the fig_num figure as the current figure as we can't save a figure that's not the current figure.
169 fig = plt.figure(fig_num)
171 # Deal with animations
172 cont = False
173 for anim in anims:
174 if anim._fig is fig: 174 ↛ 173line 174 didn't jump to line 173 because the condition on line 174 was always true
175 image_mds.append(_anim_md(anim, str(image_path), gallery_conf))
176 cont = True
177 break
178 if cont:
179 continue
181 # get fig titles
182 fig_titles = _matplotlib_fig_titles(fig)
183 to_rgba = matplotlib.colors.colorConverter.to_rgba
185 # shallow copy should be fine here, just want to avoid changing
186 # "kwargs" for subsequent figures processed by the loop
187 these_kwargs = kwargs.copy()
188 for attr in ["facecolor", "edgecolor"]:
189 fig_attr = getattr(fig, "get_" + attr)()
190 default_attr = matplotlib.rcParams["figure." + attr]
191 if to_rgba(fig_attr) != to_rgba(default_attr) and attr not in kwargs: 191 ↛ 192line 191 didn't jump to line 192 because the condition on line 191 was never true
192 these_kwargs[attr] = fig_attr
194 # save the figures, and populate the srcsetpaths
195 try:
196 fig.savefig(image_path, **these_kwargs)
197 dpi0 = matplotlib.rcParams["savefig.dpi"]
198 if dpi0 == "figure": 198 ↛ 200line 198 didn't jump to line 200 because the condition on line 198 was always true
199 dpi0 = fig.dpi
200 dpi0 = these_kwargs.get("dpi", dpi0)
201 srcsetpaths = {0: image_path}
203 # save other srcset paths, keyed by multiplication factor:
204 for mult in srcset_mult_facs:
205 if not (mult == 1):
206 multst = f"{mult}".replace(".", "_")
207 name = f"{image_path.stem}_{multst}x{image_path.suffix}"
208 hipath = image_path.parent / Path(name)
209 hikwargs = these_kwargs.copy()
210 hikwargs["dpi"] = mult * dpi0
211 fig.savefig(hipath, **hikwargs)
212 srcsetpaths[mult] = hipath
213 srcsetpaths = [srcsetpaths]
214 except Exception:
215 plt.close("all")
216 raise
218 if "images" in gallery_conf["compress_images"]: 218 ↛ 219line 218 didn't jump to line 219 because the condition on line 218 was never true
219 optipng(image_path, gallery_conf["compress_images_args"])
220 for _, hipath in srcsetpaths[0].items():
221 optipng(hipath, gallery_conf["compress_images_args"])
223 image_mds.append((image_path, fig_titles, srcsetpaths))
225 plt.close("all")
227 # Create the markdown or html output
228 # <li>
229 # <img src="../_images/mkd_glr_plot_1_exp_001.png"
230 # srcset="../_images/mkd_glr_plot_1_exp_001.png, ../_images/mkd_glr_plot_1_exp_001_2_0x.png 2.0x"
231 # alt="Exponential function" class="sphx-glr-multi-img">
232 # </li>
233 # <li>
234 # <img src="../_images/mkd_glr_plot_1_exp_002.png"
235 # srcset="../_images/mkd_glr_plot_1_exp_002.png, ../_images/mkd_glr_plot_1_exp_002_2_0x.png 2.0x"
236 # alt="Negative exponential function" class="sphx-glr-multi-img">
237 # </li>
239 md = ""
240 if len(image_mds) == 1:
241 if isinstance(image_mds[0], str):
242 # an animation, see _anim_md
243 md = image_mds[0]
244 else:
245 # an image
246 image_path, fig_titles, srcsetpaths = image_mds[0]
247 md = figure_md_or_html([image_path], script, fig_titles, srcsetpaths=srcsetpaths)
248 elif len(image_mds) > 1:
249 # Old
250 # Replace the 'single' CSS class by the 'multi' one
251 # image_mds = [re.sub(r"mkd-glr-single-img", "mkd-glr-multi-img", image) for image in image_mds]
252 # image_mds = [HLIST_IMAGE_MATPLOTLIB % image for image in image_mds]
253 # md = HLIST_HEADER % (''.join(image_mds))
255 # New: directly use the html
256 image_htmls = []
257 for image_path, fig_titles, srcsetpaths in image_mds:
258 img_html = figure_md_or_html([image_path], script, fig_titles, srcsetpaths=srcsetpaths, raw_html=True)
259 image_htmls.append(img_html)
260 md = HLIST_HEADER % ("".join(image_htmls))
261 return md
264def _anim_md(anim, image_path, gallery_conf):
265 import matplotlib
266 from matplotlib.animation import FFMpegWriter, ImageMagickWriter
268 # output the thumbnail as the image, as it will just be copied
269 # if it's the file thumbnail
270 fig = anim._fig
271 image_path = image_path.replace(".png", ".gif")
272 fig_size = fig.get_size_inches()
273 thumb_size = gallery_conf["thumbnail_size"]
274 use_dpi = round(min(t_s / f_s for t_s, f_s in zip(thumb_size, fig_size)))
275 # FFmpeg is buggy for GIFs before Matplotlib 3.3.1
276 if parse_version(matplotlib.__version__) >= parse_version("3.3.1") and FFMpegWriter.isAvailable(): 276 ↛ 277line 276 didn't jump to line 277 because the condition on line 276 was never true
277 writer = "ffmpeg"
278 elif ImageMagickWriter.isAvailable(): 278 ↛ 281line 278 didn't jump to line 281 because the condition on line 278 was always true
279 writer = "imagemagick"
280 else:
281 writer = None
282 anim.save(image_path, writer=writer, dpi=use_dpi)
283 html = anim._repr_html_()
284 if html is None: # plt.rcParams['animation.html'] == 'none' 284 ↛ 286line 284 didn't jump to line 286 because the condition on line 284 was always true
285 html = anim.to_jshtml()
286 html = indent(html, " ")
287 return _ANIMATION_RST.format(html=html)
290def mayavi_scraper(block, script: GalleryScript):
291 """Scrape Mayavi images.
293 Parameters
294 ----------
295 block : tuple
296 A tuple containing the (label, content, line_number) of the block.
298 script : GalleryScript
299 Script being run
301 Returns
302 -------
303 md : str
304 The ReSTructuredText that will be rendered to HTML containing
305 the images. This is often produced by :func:`figure_md_or_html`.
306 """
307 try:
308 from mayavi import mlab
309 except ModuleNotFoundError:
310 warn("No module named 'mayavi', skipping mayavi image scraper.")
311 return "" # skip scraper function
313 image_path_iterator = script.run_vars.image_path_iterator
314 image_paths = list()
315 e = mlab.get_engine()
316 for scene, image_path in zip(e.scenes, image_path_iterator):
317 try:
318 mlab.savefig(str(image_path), figure=scene)
319 except Exception:
320 mlab.close(all=True)
321 raise
322 # make sure the image is not too large
323 rescale_image(image_path, image_path, 850, 999)
324 if "images" in script.gallery_conf["compress_images"]: 324 ↛ 325line 324 didn't jump to line 325 because the condition on line 324 was never true
325 optipng(image_path, script.gallery_conf["compress_images_args"])
326 image_paths.append(image_path)
327 mlab.close(all=True)
328 return figure_md_or_html(image_paths, script)
330def pyvista_scraper(block, script: GalleryScript):
331 """Scrape PyVista images.
333 Parameters
334 ----------
335 block : tuple
336 A tuple containing the (label, content, line_number) of the block.
338 script : GalleryScript
339 Script being run
341 Returns
342 -------
343 md : str
344 The ReSTructuredText that will be rendered to HTML containing
345 the images. This is often produced by :func:`figure_md_or_html`.
346 """
347 import pyvista as pv
348 import pyvista.plotting as pv_plt
349 import shutil
351 if not pv.BUILDING_GALLERY: 351 ↛ 352line 351 didn't jump to line 352 because the condition on line 351 was never true
352 raise RuntimeError(pv.BUILDING_GALLERY_ERROR_MSG)
353 if not pv.OFF_SCREEN: 353 ↛ 354line 353 didn't jump to line 354 because the condition on line 353 was never true
354 raise RuntimeError("set pyvista.OFF_SCREEN=True to use the pyvista image scraper.")
356 image_path_iterator = script.run_vars.image_path_iterator
357 image_paths = list()
358 try:
359 # pyvista >= 0.40
360 figures = pv_plt.plotter._ALL_PLOTTERS
361 except AttributeError:
362 # pyvista < 0.40
363 figures = pv_plt._ALL_PLOTTERS
364 for _, plotter in figures.items():
365 fname = next(image_path_iterator)
366 if hasattr(plotter, "_gif_filename"):
367 # move gif to fname
368 fname = fname.with_suffix('').with_suffix(".gif")
369 shutil.move(plotter._gif_filename, fname)
370 else:
371 plotter.screenshot(fname)
372 image_paths.append(fname)
373 pv.close_all() # close and clear all plotters
374 return figure_md_or_html(image_paths, script)
377_scraper_dict = dict(
378 matplotlib=matplotlib_scraper,
379 mayavi=mayavi_scraper,
380 pyvista=pyvista_scraper,
381)
384# For now, these are what we support
385_KNOWN_IMG_EXTS = (".png", ".svg", ".jpg", ".gif")
388class ImageNotFoundError(FileNotFoundError):
389 def __init__(self, path):
390 self.path = path
392 def __str__(self):
393 return f"Image {self.path} can not be found on disk, with any of the known extensions {_KNOWN_IMG_EXTS}"
396def _find_image_ext(path: Path, raise_if_not_found: bool = True) -> Path:
397 """Find an image, tolerant of different file extensions."""
399 for ext in _KNOWN_IMG_EXTS:
400 this_path = path.with_suffix(ext)
401 if this_path.exists():
402 break
403 else:
404 if raise_if_not_found: 404 ↛ 408line 404 didn't jump to line 408 because the condition on line 404 was always true
405 raise ImageNotFoundError(path)
407 # None exists. Default to png.
408 ext = ".png"
409 this_path = path.with_suffix(ext)
411 return this_path, ext
414def save_figures(block, script: GalleryScript):
415 """Save all open figures of the example code-block.
417 Parameters
418 ----------
419 block : tuple
420 A tuple containing the (label, content, line_number) of the block.
422 script : GalleryScript
423 Script run.
425 Returns
426 -------
427 images_md : str
428 md code to embed the images in the document.
429 """
430 image_path_iterator = script.run_vars.image_path_iterator
431 all_md = ""
432 prev_count = len(image_path_iterator)
433 for scraper in script.gallery_conf["image_scrapers"]:
434 # Use the scraper to generate the md containing image(s) (may be several)
435 md = scraper(block, script)
436 if not isinstance(md, str): 436 ↛ 437line 436 didn't jump to line 437 because the condition on line 436 was never true
437 raise ExtensionError(f"md from scraper {scraper!r} was not a string, got type {type(md)}:\n{md!r}")
439 # Make sure that all images generated by the scraper exist.
440 n_new = len(image_path_iterator) - prev_count
441 for ii in range(n_new):
442 current_path, ext = _find_image_ext(image_path_iterator.paths[prev_count + ii])
443 if not current_path.exists(): 443 ↛ 444line 443 didn't jump to line 444 because the condition on line 443 was never true
444 raise ExtensionError(f"Scraper {scraper!r} did not produce expected image:\n{current_path}")
446 all_md += md
448 return all_md
451PREFIX_LEN = len("mkd_glr_")
454def figure_md_or_html(
455 figure_paths: List[Path],
456 script: GalleryScript,
457 fig_titles: str = "",
458 srcsetpaths: List[Dict[float, Path]] = None,
459 raw_html=False,
460):
461 """Generate md or raw html for a list of image filenames.
463 Depending on whether we have one or more figures, we use a
464 single md call to 'image' or a horizontal list.
466 Parameters
467 ----------
468 figure_paths : List[Path]
469 List of strings of the figures' absolute paths.
470 sources_dir : Path
471 absolute path of Sphinx documentation sources
472 fig_titles : str
473 Titles of figures, empty string if no titles found. Currently
474 only supported for matplotlib figures, default = ''.
475 srcsetpaths : list or None
476 List of dictionaries containing absolute paths. If
477 empty, then srcset field is populated with the figure path.
478 (see ``image_srcset`` configuration option). Otherwise,
479 each dict is of the form
480 {0: /images/image.png, 2.0: /images/image_2_0x.png}
481 where the key is the multiplication factor and the contents
482 the path to the image created above.
484 Returns
485 -------
486 images_md : str
487 md code to embed the images in the document
489 The md code will have a custom ``image-sg`` directive that allows
490 multiple resolution images to be served e.g.:
491 ``:srcset: /plot_types/imgs/img_001.png,
492 /plot_types/imgs/img_2_0x.png 2.0x``
494 """
496 if srcsetpaths is None:
497 # this should never happen, but figure_md_or_html is public, so
498 # this has to be a kwarg...
499 srcsetpaths = [{0: fl} for fl in figure_paths]
501 # Get all images relative to the website sources root
502 sources_dir = script.gallery.all_info.mkdocs_docs_dir
503 script_md_dir = script.gallery.generated_dir
505 # Get alt text
506 alt = ""
507 if fig_titles:
508 alt = fig_titles
509 elif figure_paths:
510 file_name = os.path.split(str(figure_paths[0]))[1]
511 # remove ext & 'mkd_glr_' from start & n#'s from end
512 file_name_noext = os.path.splitext(file_name)[0][PREFIX_LEN:-4]
513 # replace - & _ with \s
514 file_name_final = re.sub(r"[-,_]", " ", file_name_noext)
515 alt = file_name_final
517 alt = _single_line_sanitize(alt)
519 images_md = ""
520 if len(figure_paths) == 1:
521 figure_path = figure_paths[0]
522 hinames = srcsetpaths[0]
523 srcset = _get_srcset_st(script_md_dir, hinames)
524 figure_path_rel_to_script_md_dir = figure_path.relative_to(script_md_dir).as_posix().lstrip("/")
525 if raw_html:
526 # html version
527 images_md = (
528 f'<img alt="{alt}" src="../{figure_path_rel_to_script_md_dir}" srcset="{srcset}", '
529 f'class="sphx-glr-single-img" />'
530 )
531 else:
532 # markdown version
533 images_md = f'![{alt}](./{figure_path_rel_to_script_md_dir}){{: .mkd-glr-single-img srcset="{srcset}"}}'
535 elif len(figure_paths) > 1: 535 ↛ 536line 535 didn't jump to line 536 because the condition on line 535 was never true
536 images_md = HLIST_HEADER
537 for nn, figure_path in enumerate(figure_paths):
538 hinames = srcsetpaths[nn]
539 srcset = _get_srcset_st(script_md_dir, hinames)
540 figure_path_rel_to_script_md_dir = figure_path.relative_to(script_md_dir).as_posix().lstrip("/")
541 images_md += HLIST_SG_TEMPLATE % (
542 alt,
543 figure_path_rel_to_script_md_dir,
544 srcset,
545 )
547 return images_md
550def _get_srcset_st(sources_dir: Path, hinames: Dict[float, Path]):
551 """
552 Create the srcset string for including on the md line.
553 ie. sources_dir might be /home/sample-proj/source,
554 hinames posix paths to
555 0: /home/sample-proj/source/plot_types/images/img1.png,
556 2.0: /home/sample-proj/source/plot_types/images/img1_2_0x.png,
557 The result will be:
558 '/plot_types/basic/images/mkd_glr_pie_001.png,
559 /plot_types/basic/images/mkd_glr_pie_001_2_0x.png 2.0x'
560 """
561 srcst = ""
562 for k in hinames.keys():
563 path = hinames[k].relative_to(sources_dir).as_posix().lstrip("/")
564 srcst += "../" + path
565 if k == 0:
566 srcst += ", "
567 else:
568 srcst += f" {k:1.1f}x, "
569 if srcst[-2:] == ", ": 569 ↛ 571line 569 didn't jump to line 571 because the condition on line 569 was always true
570 srcst = srcst[:-2]
571 srcst += ""
573 return srcst
576def _single_line_sanitize(s):
577 """Remove problematic newlines."""
578 # For example, when setting a :alt: for an image, it shouldn't have \n
579 # This is a function in case we end up finding other things to replace
580 return s.replace("\n", " ")
583# The following strings are used when we have several pictures: we use
584# an html div tag that our CSS uses to turn the lists into horizontal
585# lists.
586HLIST_HEADER = """
587<ul class="mkd-glr-horizontal">
588%s
589</ul>
590"""
592HLIST_IMAGE_MATPLOTLIB = """<li>
593%s
594</li>"""
596HLIST_SG_TEMPLATE = """
597 * ![%s](../%s){: .mkd-glr-multi-img srcset="%s"}
598"""
601###############################################################################
602# Module resetting
605def _reset_matplotlib(gallery_conf, file: Path):
606 """Reset matplotlib."""
607 try:
608 import matplotlib
609 except ImportError:
610 # Matplotlib is not present: do not care
611 pass
612 else:
613 # Proceed with resetting it
614 _, plt = _import_matplotlib()
615 plt.rcdefaults()
618def _reset_seaborn(gallery_conf, file: Path):
619 """Reset seaborn."""
620 # Horrible code to 'unload' seaborn, so that it resets
621 # its default when is load
622 # Python does not support unloading of modules
623 # https://bugs.python.org/issue9072
624 for module in list(sys.modules.keys()):
625 if "seaborn" in module:
626 del sys.modules[module]
629_reset_dict = {
630 "matplotlib": _reset_matplotlib,
631 "seaborn": _reset_seaborn,
632}
635def clean_modules(gallery_conf: Dict, file: Optional[Path]):
636 """Remove, unload, or reset modules after running each example.
638 After a script is executed it can load a variety of settings that one
639 does not want to influence in other examples in the gallery.
641 Parameters
642 ----------
643 gallery_conf : dict
644 The gallery configuration.
646 file : Path
647 The example being run. Will be None when this is called entering
648 a directory of examples to be built.
649 """
650 for reset_module in gallery_conf["reset_modules"]:
651 reset_module(gallery_conf, file)