⬅ mkdocs_gallery/gen_single.py source

1 # Authors: Sylvain MARIE <sylvain.marie@se.com>
2 # + All contributors to <https://github.com/smarie/mkdocs-gallery>
3 #
4 # Original idea and code: sphinx-gallery, <https://sphinx-gallery.github.io>
5 # License: 3-clause BSD, <https://github.com/smarie/mkdocs-gallery/blob/master/LICENSE>
6 """
7 Generator for a single script example in a gallery.
8 """
9  
10 from __future__ import absolute_import, division, print_function
11  
12 import ast
13 import codeop
14 import contextlib
  • F401 'copy' imported but unused
15 import copy
16 import gc
17 import importlib
18 import os
  • S403 Consider possible security implications associated with pickle module.
19 import pickle
20 import re
  • S404 Consider possible security implications associated with the subprocess module.
21 import subprocess
22 import sys
23 import traceback
24 import warnings
25 from copy import deepcopy
26 from functools import partial
27 from io import StringIO
28 from pathlib import Path
29 from shutil import copyfile
30 from textwrap import indent, dedent
31 from time import time
32 from typing import List, Set, Tuple
33  
34 from tqdm import tqdm
35  
36 from . import glr_path_static, mkdocs_compatibility
37 from .backreferences import _thumbnail_div, _write_backreferences, identify_names
38 from .binder import check_binder_conf, gen_binder_md
39 from .errors import ExtensionError
40 from .gen_data_model import GalleryBase, GalleryScript, GalleryScriptResults
41 from .notebook import jupyter_notebook, save_notebook
42 from .py_source_parser import remove_config_comments, split_code_and_text_blocks
43 from .scrapers import ImageNotFoundError, _find_image_ext, clean_modules, save_figures
44 from .utils import _new_file, _replace_by_new_if_needed, optipng, rescale_image, run_async
45  
46 logger = mkdocs_compatibility.getLogger("mkdocs-gallery")
47  
48  
49 ###############################################################################
50  
51  
52 class _LoggingTee(object):
53 """A tee object to redirect streams to the logger."""
54  
55 def __init__(self, src_filename: Path):
56 self.logger = logger
57 self.src_filename = src_filename
58 self.logger_buffer = ""
59 self.set_std_and_reset_position()
60  
61 def set_std_and_reset_position(self):
62 if not isinstance(sys.stdout, _LoggingTee):
63 self.origs = (sys.stdout, sys.stderr)
64 sys.stdout = sys.stderr = self
65 self.first_write = True
66 self.output = StringIO()
67 return self
68  
69 def restore_std(self):
70 sys.stdout.flush()
71 sys.stderr.flush()
72 sys.stdout, sys.stderr = self.origs
73  
74 def write(self, data):
75 self.output.write(data)
76  
77 if self.first_write:
78 self.logger.verbose("Output from %s", self.src_filename) # color='brown')
79 self.first_write = False
80  
81 data = self.logger_buffer + data
82 lines = data.splitlines()
83 if data and data[-1] not in "\r\n":
84 # Wait to write last line if it's incomplete. It will write next
85 # time or when the LoggingTee is flushed.
86 self.logger_buffer = lines[-1]
87 lines = lines[:-1]
88 else:
89 self.logger_buffer = ""
90  
91 for line in lines:
92 self.logger.verbose("%s", line)
93  
94 def flush(self):
95 self.output.flush()
96 if self.logger_buffer:
97 self.logger.verbose("%s", self.logger_buffer)
98 self.logger_buffer = ""
99  
100 # When called from a local terminal seaborn needs it in Python3
101 def isatty(self):
102 return self.output.isatty()
103  
104 # When called in gen_single, conveniently use context managing
105 def __enter__(self):
106 return self
107  
108 def __exit__(self, type_, value, tb):
109 self.restore_std()
110  
111  
112 ###############################################################################
113 # The following strings are used when we have several pictures: we use
114 # an html div tag that our CSS uses to turn the lists into horizontal
115 # lists.
116 HLIST_HEADER = """
117 {: .mkd-glr-horizontal }
118  
119 """
120  
121 HLIST_IMAGE_TEMPLATE = """
122 *
123  
124 .. image:: /%s
125 {: .mkd-glr-multi-img }
126 """
127  
128 SINGLE_IMAGE = """
129 .. image:: /%s
130 {: .mkd-glr-single-img }
131 """
132  
133 # Note: since this seems to be a one-liner, we use inline code. TODO check
134 CODE_OUTPUT = """Out:
135 {{: .mkd-glr-script-out }}
136  
137 ```{{.shell .mkd-glr-script-out-disp }}
138 {0}
139 ```
140 \n"""
141  
142 TIMING_CONTENT = """
143 **Total running time of the script:** ({0: .0f} minutes {1: .3f} seconds)
144 """ # Strange enough: this CSS class does not actually exist in sphinx-gallery {{: .mkd-glr-timing }}
145  
146 # TODO only if html ? .. only:: html
147 MKD_GLR_SIG = """\n
148 [Gallery generated by mkdocs-gallery](https://smarie.github.io/mkdocs-gallery){: .mkd-glr-signature }
149 """
150  
151 # Header used to include raw html from data _repr_html_
152 HTML_HEADER = """<div class="output_subarea output_html rendered_html output_result">
153 {0}
154 </div>
155 """
156  
157  
158 def codestr2md(codestr, lang: str = "python", lineno=None, is_exc: bool = False):
159 """Return markdown code block from code string."""
160  
161 # if lineno is not None:
162 # # Sphinx only starts numbering from the first non-empty line.
163 # blank_lines = codestr.count('\n', 0, -len(codestr.lstrip()))
164 # lineno = ' :lineno-start: {0}\n'.format(lineno + blank_lines)
165 # else:
166 # lineno = ''
167 # code_directive = ".. code-block:: {0}\n{1}\n".format(lang, lineno)
168 # indented_block = indent(codestr, ' ' * 4)
169 # return code_directive + indented_block
170 style = " .mkd-glr-script-err-disp" if is_exc else ""
171 if lineno is not None:
172 # Sphinx only starts numbering from the first non-empty line. TODO do we need this too ?
173 # blank_lines = codestr.count('\n', 0, -len(codestr.lstrip()))
174 return f'```{{.{lang} {style} linenums="{lineno}"}}\n{codestr}```\n'
175 else:
176 return f"```{{.{lang} {style}}}\n{codestr}```\n"
177  
178  
179 def _regroup(x):
180 x = x.groups()
181 return x[0] + x[1].split(".")[-1] + x[2]
182  
183  
184 def _sanitize_md(string):
185 """Use regex to remove at least some sphinx directives.
186  
187 TODO is this still needed ?
188 """
189 # :class:`a.b.c <thing here>`, :ref:`abc <thing here>` --> thing here
190 p, e = r"(\s|^):[^:\s]+:`", r"`(\W|$)"
191 string = re.sub(p + r"\S+\s*<([^>`]+)>" + e, r"\1\2\3", string)
192 # :class:`~a.b.c` --> c
193 string = re.sub(p + r"~([^`]+)" + e, _regroup, string)
194 # :class:`a.b.c` --> a.b.c
195 string = re.sub(p + r"([^`]+)" + e, r"\1\2\3", string)
196  
197 # ``whatever thing`` --> whatever thing
198 p = r"(\s|^)`"
199 string = re.sub(p + r"`([^`]+)`" + e, r"\1\2\3", string)
200 # `whatever thing` --> whatever thing
201 string = re.sub(p + r"([^`]+)" + e, r"\1\2\3", string)
202 return string
203  
204  
205 # Find RST/Markdown title chars,
206 # i.e. lines that consist of (3 or more of the same) 7-bit non-ASCII chars.
207 # This conditional is not perfect but should hopefully be good enough.
208 RE_3_OR_MORE_NON_ASCII = r"([\W _])\1{3,}" # 3 or more identical chars
209  
210 RST_TITLE_MARKER = re.compile(rf"^[ ]*{RE_3_OR_MORE_NON_ASCII}[ ]*$")
211 MD_TITLE_MARKER = re.compile(r"^[ ]*[#]+[ ]*(.*)[ ]*$") # One or more starting hash with optional whitespaces before.
212 FIRST_NON_MARKER_WITHOUT_HASH = re.compile(rf"^[# ]*(?!{RE_3_OR_MORE_NON_ASCII})[# ]*(.+)", re.MULTILINE)
213  
214  
215 def extract_readme_title(file: Path, contents: str) -> str:
216 """Same as `extract_intro_and_title` for the readme files in galleries, but does not return the introduction.
217  
218 Parameters
219 ----------
220 file : Path
221 The readme file path (used for error messages only).
222  
223 contents : str
224 The already parsed readme contents
225  
226 Returns
227 -------
228 title : str
229 The readme title
230 """
231 # Remove html comments.
232 contents = re.sub("(<!--.*?-->)", "", contents, flags=re.DOTALL)
233  
234 match = FIRST_NON_MARKER_WITHOUT_HASH.search(contents)
235 if match is None:
236 raise ExtensionError(f"Could not find a title in readme file: {file}")
237  
238 title = match.group(2).strip()
239 return title
240  
241  
242 def extract_readme_last_subtitle(file: Path, contents: str) -> str:
243 """Same as `extract_intro_and_title` for the readme files in galleries, but does not return the introduction.
244  
245 Parameters
246 ----------
247 file : Path
248 The readme file path (used for error messages only).
249  
250 contents : str
251 The already parsed readme contents
252  
253 Returns
254 -------
255 last_subtitle : str
256 The readme last title, or None.
257 """
258 paragraphs = extract_paragraphs(contents)
259  
260 # iterate from last paragraph
261 last_subtitle = None
262 for p in reversed(paragraphs):
263 current_is_good = False
264 for line in reversed(p.splitlines()):
265 if current_is_good:
266 last_subtitle = line
267 break
268 # Does this line contain a title ?
269 # - md style
270 md_match = MD_TITLE_MARKER.search(line)
271 if md_match:
272 last_subtitle = md_match.group(1)
273 break
274  
275 # - rst style
276 rst_match = RST_TITLE_MARKER.search(line)
277 if rst_match:
278 current_is_good = True
279  
280 if last_subtitle:
281 break
282  
283 return last_subtitle
284  
285  
286 def extract_paragraphs(doc: str) -> List[str]:
287 # lstrip is just in case docstring has a '\n\n' at the beginning
288 paragraphs = doc.lstrip().split("\n\n")
289  
290 # remove comments and other syntax like `.. _link:`
291 paragraphs = [p for p in paragraphs if not p.startswith(".. ") and len(p) > 0]
292  
293 return paragraphs
294  
295  
296 def extract_intro_and_title(docstring: str, script: GalleryScript) -> Tuple[str, str]:
297 """Extract and clean the first paragraph of module-level docstring.
298  
299 The title is not saved in the `script` object in this process, users have to do it explicitly.
300  
301 Parameters
302 ----------
303 docstring : str
304 The docstring extracted from the top of the script.
305  
306 script : GalleryScript
307 The script where the docstring was extracted from (used for error messages only).
308  
309 Returns
310 -------
311 title : str
312 The title
313  
314 introduction : str
315 The introduction
316 """
317 # Extract paragraphs from the text
318 paragraphs = extract_paragraphs(docstring)
319 if len(paragraphs) == 0:
320 raise ExtensionError(
321 f"Example docstring should have a header for the example title. "
322 f"Please check the example file:\n {script.script_file}\n"
323 )
324  
325 # Title is the first paragraph with any RST/Markdown title chars
326 # removed, i.e. lines that consist of (3 or more of the same) 7-bit
327 # non-ASCII chars.
328 # This conditional is not perfect but should hopefully be good enough.
329 title_paragraph = paragraphs[0]
330 match = FIRST_NON_MARKER_WITHOUT_HASH.search(title_paragraph)
331 if match is None:
332 raise ExtensionError(f"Could not find a title in first paragraph:\n{title_paragraph}")
333  
334 title = match.group(2).strip()
335  
336 # Use the title if no other paragraphs are provided
337 intro_paragraph = title if len(paragraphs) < 2 else paragraphs[1]
338  
339 # Concatenate all lines of the first paragraph
340 intro = re.sub("\n", " ", intro_paragraph)
341 intro = _sanitize_md(intro)
342  
343 # Truncate at 95 chars
344 if len(intro) > 95:
345 intro = intro[:95] + "..."
346  
347 return title, intro
348  
349  
350 def create_thumb_from_image(script: GalleryScript, src_image_path: Path) -> Path:
351 """Create a thumbnail image from the `src_image_path`.
352  
353 Parameters
354 ----------
355 script : GalleryScript
356 The gallery script.
357  
358 src_image_path : Path
359 The source image path, with some flexibility about the extension.
360 TODO do we actually need this flexibility here ?
361  
362 Returns
363 -------
364 actual_thumb_file : Path
365 The actual thumbnail file generated.
366 """
367 try:
368 # Find the image, with flexibility about the actual extenstion ('png', 'svg', 'jpg', 'gif' are supported)
369 src_image_path, ext = _find_image_ext(src_image_path)
370 except ImageNotFoundError:
371 # The source image does not exist !
372 try:
373 # Does a thumbnail already exist ? with extenstion ('png', 'svg', 'jpg', 'gif')
374 thumb_file, ext = _find_image_ext(script.get_thumbnail_file(".png"))
375 # Yes - let's assume this one will suit the needs
376 return thumb_file
377 except ImageNotFoundError:
378 # Create something to replace the thumbnail
379 default_thumb_path = script.gallery_conf.get("default_thumb_file")
380 if default_thumb_path is None:
381 default_thumb_path = os.path.join(glr_path_static(), "no_image.png")
382  
383 src_image_path, ext = _find_image_ext(Path(default_thumb_path))
384  
385 # Now let's create the thumbnail.
386 # - First Make sure the thumb dir exists
387 script.gallery.make_thumb_dir()
388  
389 # - Then create the thum file by copying the src image, possibly rescaling it.
390 thumb_file = script.get_thumbnail_file(ext)
391 if ext in (".svg", ".gif"):
392 # No need to rescale image
393 copyfile(src_image_path, thumb_file)
394 else:
395 # Need to rescale image
396 max_width, max_hegiht = script.gallery_conf["thumbnail_size"]
397 rescale_image(
398 in_file=src_image_path,
399 out_file=thumb_file,
400 max_width=max_width,
401 max_height=max_hegiht,
402 )
403 if "thumbnails" in script.gallery_conf["compress_images"]:
404 optipng(thumb_file, script.gallery_conf["compress_images_args"])
405  
406 return thumb_file
407  
408  
409 def generate(gallery: GalleryBase, seen_backrefs: Set) -> Tuple[str, str, str, List[GalleryScriptResults]]:
410 """
411 Generate the gallery md for an example directory, including the index.
412  
413 Parameters
414 ----------
415 gallery : GalleryBase
416 The gallery or subgallery to process
417  
418 seen_backrefs : Set
419 Backrefs seen so far.
420  
421 Returns
422 -------
423 title : str
424 The gallery title, that is, the title of the readme file.
425  
426 root_subtitle : str
427 The gallery suptitle that will be used in case the gallery has subsections.
428  
429 index_md : str
430 The markdown to include in the global gallery readme.
431  
432 results : List[GalleryScriptResults]
433 A list of processing results for all scripts in this gallery.
434 """
435 # Read the gallery readme and add it to the index
436 readme_contents = gallery.readme_file.read_text(encoding="utf-8")
437 readme_title = extract_readme_title(gallery.readme_file, readme_contents)
438 if gallery.has_subsections():
439 # parse and try to also extract the last subtitle
440 last_readme_subtitle = extract_readme_last_subtitle(gallery.readme_file, readme_contents)
441 else:
442 # Dont look for the last subtitle
443 last_readme_subtitle = None
444  
445 # Create the destination dir if needed
446 gallery.make_generated_dir()
447  
448 all_thumbnail_entries = []
449 results = []
450  
451 for script in tqdm(gallery.scripts, desc=f"generating gallery for {gallery.generated_dir}... "):
452 # Generate all files related to this example: download file, jupyter notebook, pickle, markdown...
453 script_results = generate_file_md(script=script, seen_backrefs=seen_backrefs)
454 results.append(script_results)
455  
456 # Create the thumbnails-containing div <div class="mkd-glr-thumbcontainer" ...> to place in the readme
457 thumb_div = _thumbnail_div(script_results)
458 all_thumbnail_entries.append(thumb_div)
459  
460 # Write the gallery summary index.md
461 # Note: we write the HTML comment at the bottom instead of the top because having it at the top prevents html
462 # page metadata from mkdocs-material to be processed correctly. See GH#96
463 index_md = f"""{readme_contents}
464  
465 {"".join(all_thumbnail_entries)}
466 <div class="mkd-glr-clear"></div>
467  
468 <!-- {str(gallery.generated_dir_rel_project).replace(os.path.sep, '_')} -->
469  
470 """
471 # Note: the "clear" is to disable floating elements again, now that the gallery section is over.
472  
473 return readme_title, last_readme_subtitle, index_md, results
474  
475  
476 def is_failing_example(script: GalleryScript):
477 return script.src_py_file in script.gallery_conf["failing_examples"]
478  
479  
480 def handle_exception(exc_info, script: GalleryScript):
481 """Trim and format exception, maybe raise error, etc."""
482 from .gen_gallery import _expected_failing_examples
483  
484 etype, exc, tb = exc_info
485 stack = traceback.extract_tb(tb)
486 # The full traceback will look something like:
487 #
488 # File "/home/larsoner/python/mkdocs-gallery/sphinx_gallery/gen_single.py...
489 # mem_max, _ = gallery_conf['call_memory'](
490 # File "/home/larsoner/python/mkdocs-gallery/sphinx_gallery/gen_galler...
491 # mem, out = memory_usage(func, max_usage=True, retval=True,
492 # File "/home/larsoner/.local/lib/python3.8/site-packages/memory_profi...
493 # returned = f(*args, **kw)
494 # File "/home/larsoner/python/mkdocs-gallery/sphinx_gallery/gen_single.py...
495 # exec(self.code, self.fake_main.__dict__)
496 # File "/home/larsoner/python/mkdocs-gallery/sphinx_gallery/tests/tiny...
497 # raise RuntimeError('some error')
498 # RuntimeError: some error
499 #
500 # But we should trim these to just the relevant trace at the user level,
501 # so we inspect the traceback to find the start and stop points.
502 start = 0
503 stop = len(stack)
504 root = os.path.dirname(__file__) + os.sep
505 for ii, s in enumerate(stack, 1):
506 # Trim our internal stack
507 if s.filename.startswith(root + "gen_gallery.py") and s.name == "call_memory":
508 start = max(ii, start)
509 elif s.filename.startswith(root + "gen_single.py"):
510 # SyntaxError
511 if s.name == "execute_code_block" and ("compile(" in s.line or "save_figures" in s.line):
512 start = max(ii, start)
513 # Any other error
514 elif s.name == "__call__":
515 start = max(ii, start)
516 # Our internal input() check
517 elif s.name == "_check_input" and ii == len(stack):
518 stop = ii - 1
519 stack = stack[start:stop]
520  
521 formatted_exception = "Traceback (most recent call last):\n" + "".join(
522 traceback.format_list(stack) + traceback.format_exception_only(etype, exc)
523 )
524  
525 src_file = script.src_py_file
526 expected = src_file in _expected_failing_examples(
527 gallery_conf=script.gallery_conf,
528 mkdocs_conf=script.gallery.all_info.mkdocs_conf,
529 )
530 if expected:
531 # func, color = logger.info, 'blue'
532 func = logger.info
533 else:
534 # func, color = logger.warning, 'red'
535 func = logger.warning
536 func(f"{src_file} failed to execute correctly: {formatted_exception}") # , color=color)
537  
538 except_md = codestr2md(formatted_exception, lang="pytb", is_exc=True)
539  
540 # Ensure it's marked as our style: this is now already done in codestr2md
541 # except_md = "{: .mkd-glr-script-out }\n\n" + except_md
542 return except_md, formatted_exception
543  
544  
545 # Adapted from github.com/python/cpython/blob/3.7/Lib/warnings.py
546 def _showwarning(message, category, filename, lineno, file=None, line=None):
547 if file is None:
548 file = sys.stderr
549 if file is None:
550 # sys.stderr is None when run with pythonw.exe:
551 # warnings get lost
552 return
553 text = warnings.formatwarning(message, category, filename, lineno, line)
554 try:
555 file.write(text)
556 except OSError:
557 # the file (probably stderr) is invalid - this warning gets lost.
558 pass
559  
560  
561 @contextlib.contextmanager
562 def patch_warnings():
563 """Patch warnings.showwarning to actually write out the warning."""
564 # Sphinx or logging or someone is patching warnings, but we want to
565 # capture them, so let's patch over their patch...
566 orig_showwarning = warnings.showwarning
567 try:
568 warnings.showwarning = _showwarning
569 yield
570 finally:
571 warnings.showwarning = orig_showwarning
572  
573  
574 class _exec_once(object):
575 """Deal with memory_usage calling functions more than once (argh)."""
576  
577 def __init__(self, code, fake_main):
578 self.code = code
579 self.fake_main = fake_main
580 self.run = False
581  
582 def __call__(self):
583 if not self.run:
584 self.run = True
585 old_main = sys.modules.get("__main__", None)
586 with patch_warnings():
587 sys.modules["__main__"] = self.fake_main
588 try:
589 exec(self.code, self.fake_main.__dict__) # noqa # our purpose is to execute code :)
590 finally:
591 if old_main is not None:
592 sys.modules["__main__"] = old_main
593  
594  
595 def _get_memory_base(gallery_conf):
596 """Get the base amount of memory used by running a Python process."""
597 if not gallery_conf["plot_gallery"]:
598 return 0.0
599 # There might be a cleaner way to do this at some point
600 from memory_profiler import memory_usage
601  
602 if sys.platform in ("win32", "darwin"):
603 sleep, timeout = (1, 2)
604 else:
605 sleep, timeout = (0.5, 1)
  • S603 Subprocess call - check for execution of untrusted input.
606 proc = subprocess.Popen(
607 [sys.executable, "-c", "import time, sys; time.sleep(%s); sys.exit(0)" % sleep],
608 close_fds=True,
609 )
610 memories = memory_usage(proc, interval=1e-3, timeout=timeout)
611 kwargs = dict(timeout=timeout) if sys.version_info >= (3, 5) else {}
612 proc.communicate(**kwargs)
613 # On OSX sometimes the last entry can be None
614 memories = [mem for mem in memories if mem is not None] + [0.0]
615 memory_base = max(memories)
616 return memory_base
617  
618  
619 def _ast_module():
620 """Get ast.Module function, dealing with:
621 https://bugs.python.org/issue35894"""
622 if sys.version_info >= (3, 8):
623 ast_Module = partial(ast.Module, type_ignores=[])
624 else:
625 ast_Module = ast.Module
626 return ast_Module
627  
628  
629 def _check_reset_logging_tee(src_file: Path):
630 # Helper to deal with our tests not necessarily calling parse_and_execute
631 # but rather execute_code_block directly
632 if isinstance(sys.stdout, _LoggingTee):
633 logging_tee = sys.stdout
634 else:
635 logging_tee = _LoggingTee(src_file)
636 logging_tee.set_std_and_reset_position()
637 return logging_tee
638  
639  
640 def _exec_and_get_memory(compiler, ast_Module, code_ast, script: GalleryScript):
641 """Execute ast, capturing output if last line is expression and get max memory usage."""
642  
643 src_file = script.src_py_file.as_posix()
644  
645 # capture output if last line is expression
646 is_last_expr = False
647  
648 if len(code_ast.body) and isinstance(code_ast.body[-1], ast.Expr):
649 is_last_expr = True
650 last_val = code_ast.body.pop().value
651 # exec body minus last expression
652 mem_body, _ = script.gallery_conf["call_memory"](
653 _exec_once(compiler(code_ast, src_file, "exec"), script.run_vars.fake_main)
654 )
655 # exec last expression, made into assignment
656 body = [ast.Assign(targets=[ast.Name(id="___", ctx=ast.Store())], value=last_val)]
657 last_val_ast = ast_Module(body=body)
658 ast.fix_missing_locations(last_val_ast)
659 mem_last, _ = script.gallery_conf["call_memory"](
660 _exec_once(compiler(last_val_ast, src_file, "exec"), script.run_vars.fake_main)
661 )
662 mem_max = max(mem_body, mem_last)
663 else:
664 mem_max, _ = script.gallery_conf["call_memory"](
665 _exec_once(compiler(code_ast, src_file, "exec"), script.run_vars.fake_main)
666 )
667  
668 return is_last_expr, mem_max
669  
670  
671 def _get_last_repr(gallery_conf, ___):
672 """Get a repr of the last expression, using first method in 'capture_repr'
673 available for the last expression."""
674 for meth in gallery_conf["capture_repr"]:
675 try:
676 last_repr = getattr(___, meth)()
677 # for case when last statement is print()
678 if last_repr is None or last_repr == "None":
679 repr_meth = None
680 else:
681 repr_meth = meth
682 except Exception:
683 last_repr = None
684 repr_meth = None
685 else:
686 if isinstance(last_repr, str):
687 break
688 return last_repr, repr_meth
689  
690  
691 def _get_code_output(is_last_expr, script: GalleryScript, logging_tee, images_md):
692 """Obtain standard output and html output in md."""
693  
694 example_globals = script.run_vars.example_globals
695 gallery_conf = script.gallery_conf
696  
697 last_repr = None
698 repr_meth = None
699 if is_last_expr:
700 # capture the last repr variable
701 ___ = example_globals["___"]
702 ignore_repr = False
703 if gallery_conf["ignore_repr_types"]:
704 ignore_repr = re.search(gallery_conf["ignore_repr_types"], str(type(___)))
705 if gallery_conf["capture_repr"] != () and not ignore_repr:
706 last_repr, repr_meth = _get_last_repr(gallery_conf, ___)
707  
708 captured_std = logging_tee.output.getvalue().expandtabs()
709  
710 # normal string output
711 if repr_meth in ["__repr__", "__str__"] and last_repr:
712 captured_std = f"{captured_std}\n{last_repr}"
713  
714 if captured_std and not captured_std.isspace():
715 captured_std = CODE_OUTPUT.format(captured_std)
716 else:
717 captured_std = ""
718  
719 # give html output its own header
720 if repr_meth == "_repr_html_":
721 captured_html = HTML_HEADER.format(indent(last_repr, " " * 4))
722 else:
723 captured_html = ""
724  
725 code_output = f"""
726 {images_md}
727  
728 {captured_std}
729  
730 {captured_html}
731  
732 """
733 return code_output
734  
735  
736 def _reset_cwd_syspath(cwd, path_to_remove):
737 """Reset current working directory to `cwd` and remove `path_to_remove` from `sys.path`."""
738 if path_to_remove in sys.path:
739 sys.path.remove(path_to_remove)
740 os.chdir(cwd)
741  
742  
743 def _parse_code(bcontent, src_file, *, compiler_flags):
744 code_ast = compile(bcontent, src_file, "exec", compiler_flags | ast.PyCF_ONLY_AST, dont_inherit=1)
745 if _needs_async_handling(bcontent, src_file, compiler_flags=compiler_flags):
746 code_ast = _apply_async_handling(code_ast, compiler_flags=compiler_flags)
747 return code_ast
748  
749  
750 def _needs_async_handling(bcontent, src_file, *, compiler_flags) -> bool:
751 try:
752 compile(bcontent, src_file, "exec", compiler_flags, dont_inherit=1)
753 except SyntaxError as error:
754 # mkdocs-gallery supports top-level async code similar to jupyter notebooks.
755 # Without handling, this will raise a SyntaxError. In such a case, we apply a
756 # minimal async handling and try again. If the error persists, we bubble it up
757 # and let the caller handle it.
758 try:
759 compile(
760 f"async def __async_wrapper__():\n{indent(bcontent, ' ' * 4)}",
761 src_file,
762 "exec",
763 compiler_flags,
764 dont_inherit=1,
765 )
766 except SyntaxError:
767 # Raise the original error to avoid leaking the internal async handling to
768 # generated output.
769 raise error from None
770 else:
771 return True
772 else:
773 return False
774  
775  
776 def _apply_async_handling(code_ast, *, compiler_flags):
777 async_handling = compile(
778 dedent(
779 """
780 async def __async_wrapper__():
781 # original AST goes here
782 return locals()
783 __async_wrapper_locals__ = __run_async__(__async_wrapper__())
784 __async_wrapper_result__ = __async_wrapper_locals__.pop("__async_wrapper_result__", None)
785 globals().update(__async_wrapper_locals__)
786 __async_wrapper_result__
787 """
788 ),
789 "<_apply_async_handling()>",
790 "exec",
791 compiler_flags | ast.PyCF_ONLY_AST,
792 dont_inherit=1,
793 )
794  
795 *original_body, last_node = code_ast.body
796 if isinstance(last_node, ast.Expr):
797 last_node = ast.Assign(
798 targets=[ast.Name(id="__async_wrapper_result__", ctx=ast.Store())], value=last_node.value
799 )
800 original_body.append(last_node)
801  
802 async_wrapper = async_handling.body[0]
803 async_wrapper.body = [*original_body, *async_wrapper.body]
804  
805 return ast.fix_missing_locations(async_handling)
806  
807  
808 def execute_code_block(compiler, block, script: GalleryScript):
809 """Execute the code block of the example file.
810  
811 Parameters
812 ----------
813 compiler : codeop.Compile
814 Compiler to compile AST of code block.
815  
816 block : List[Tuple[str, str, int]]
817 List of Tuples, each Tuple contains label ('text' or 'code'),
818 the corresponding content string of block and the leading line number.
819  
820 script: GalleryScript
821 The gallery script
822  
823 Returns
824 -------
825 code_output : str
826 Output of executing code in md.
827 """
828 # if script.run_vars.example_globals is None: # testing shortcut
829 # script.run_vars.example_globals = script.run_vars.fake_main.__dict__
830  
831 blabel, bcontent, lineno = block
832  
833 # If example is not suitable to run anymore, skip executing its blocks
834 if script.run_vars.stop_executing or blabel == "text":
835 return ""
836  
837 cwd = os.getcwd()
838 # Redirect output to stdout
839 src_file = script.src_py_file
840 logging_tee = _check_reset_logging_tee(src_file)
841 assert isinstance(logging_tee, _LoggingTee) # noqa
842  
843 # First cd in the original example dir, so that any file
844 # created by the example get created in this directory
845 os.chdir(src_file.parent)
846  
847 # Add the example dir to the path temporarily (will be removed after execution)
848 new_path = os.getcwd()
849 sys.path.append(new_path)
850  
851 # Save figures unless there is a `mkdocs_gallery_defer_figures` flag
852 match = re.search(r"^[\ \t]*#\s*mkdocs_gallery_defer_figures[\ \t]*\n?", bcontent, re.MULTILINE)
853 need_save_figures = match is None
854  
855 try:
856 ast_Module = _ast_module()
857 code_ast = _parse_code(bcontent, src_file, compiler_flags=compiler.flags)
858 ast.increment_lineno(code_ast, lineno - 1)
859  
860 is_last_expr, mem_max = _exec_and_get_memory(compiler, ast_Module, code_ast, script=script)
861 script.run_vars.memory_used_in_blocks.append(mem_max)
862  
863 # This should be inside the try block, e.g., in case of a savefig error
864 logging_tee.restore_std()
865 if need_save_figures:
866 need_save_figures = False
867 images_md = save_figures(block, script)
868 else:
869 images_md = ""
870  
871 except Exception:
872 logging_tee.restore_std()
873 except_md, formatted_exception = handle_exception(sys.exc_info(), script)
874  
875 # Breaks build on first example error
876 if script.gallery_conf["abort_on_example_error"]:
877 raise
878  
879 # Stores failing file
880 script.gallery_conf["failing_examples"][src_file] = formatted_exception
881  
882 # Stop further execution on that script
883 script.run_vars.stop_executing = True
884  
885 code_output = "\n{0}\n\n\n\n".format(except_md)
886 # still call this even though we won't use the images so that
887 # figures are closed
888 if need_save_figures:
889 save_figures(block, script)
890 else:
891 _reset_cwd_syspath(cwd, new_path)
892  
893 code_output = _get_code_output(is_last_expr, script, logging_tee, images_md)
894 finally:
895 _reset_cwd_syspath(cwd, new_path)
896 logging_tee.restore_std()
897  
898 # Sanitize ANSI escape characters from MD output
899 ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
900 code_output = ansi_escape.sub("", code_output)
901  
902 return code_output
903  
904  
905 def _check_input(prompt=None):
906 raise ExtensionError("Cannot use input() builtin function in mkdocs-gallery examples")
907  
908  
909 def parse_and_execute(script: GalleryScript, script_blocks):
910 """Execute and capture output from python script already in block structure
911  
912 Parameters
913 ----------
914 script : GalleryScript
915 The script
916  
917 script_blocks : list
918 (label, content, line_number)
919 List where each element is a tuple with the label ('text' or 'code'),
920 the corresponding content string of block and the leading line number
921  
922 Returns
923 -------
924 output_blocks : list
925 List of strings where each element is the restructured text
926 representation of the output of each block
927  
928 time_elapsed : float
929 Time elapsed during execution
930  
931 memory_used : float
932 Memory used during execution
933 """
934 # Examples may contain if __name__ == '__main__' guards for in example scikit-learn if the example uses
935 # multiprocessing. Here we create a new __main__ module, and temporarily change sys.modules when running our example
936 fake_main = importlib.util.module_from_spec(importlib.util.spec_from_loader("__main__", None))
937 script.run_vars.fake_main = fake_main
938  
939 example_globals = fake_main.__dict__
940 example_globals.update(
941 {
942 # A lot of examples contains 'print(__doc__)' for example in
943 # scikit-learn so that running the example prints some useful
944 # information. Because the docstring has been separated from
945 # the code blocks in mkdocs-gallery, __doc__ is actually
946 # __builtin__.__doc__ in the execution context and we do not
947 # want to print it
948 "__doc__": "",
949 # Don't ever support __file__: Issues #166 #212
950 # Don't let them use input()
951 "input": _check_input,
952 "__run_async__": run_async,
953 }
954 )
955 script.run_vars.example_globals = example_globals
956  
957 # Manipulate the sys.argv before running the example
958 # See https://github.com/sphinx-gallery/sphinx-gallery/pull/252
959  
960 # Remember the original argv so that we can put them back after run
961 argv_orig = sys.argv[:]
962  
963 # Remember the original sys.path so that we can reset it after run
964 sys_path_orig = deepcopy(sys.path)
965  
966 # Python file is the original one (not the copy for download)
967 sys.argv[0] = script.src_py_file.as_posix()
968  
969 # Allow users to provide additional args through the 'reset_argv' option
970 sys.argv[1:] = script.gallery_conf["reset_argv"](script)
971  
972 # Perform a garbage collection before starting so that perf kpis are accurate (memory and time)
973 gc.collect()
974  
975 # Initial memory used
976 memory_start, _ = script.gallery_conf["call_memory"](lambda: None)
977 script.run_vars.memory_used_in_blocks = [memory_start] # include at least one entry to avoid max() ever failing
978  
979 t_start = time()
980 compiler = codeop.Compile()
981  
982 # Execute block by block
983 output_blocks = list()
984 with _LoggingTee(script.src_py_file) as logging_tee:
985 for block in script_blocks:
986 logging_tee.set_std_and_reset_position()
987 output_blocks.append(execute_code_block(compiler, block, script))
988  
989 # Compute the elapsed time
990 time_elapsed = time() - t_start
991  
992 # Set back the sys argv
993 sys.argv = argv_orig
994  
995 # Set back the sys path
996 sys.path = sys_path_orig
997  
998 # Write md5 checksum if the example was meant to run (no-plot shall not cache md5sum) and has built correctly
999 script.write_final_md5_file()
1000  
1001 # Declare the example as "passing"
1002 script.gallery_conf["passing_examples"].append(script)
1003  
1004 script.run_vars.memory_delta = max(script.run_vars.memory_used_in_blocks) - memory_start
1005 memory_used = script.gallery_conf["memory_base"] + script.run_vars.memory_delta
1006  
1007 return output_blocks, time_elapsed, memory_used
1008  
1009  
1010 def generate_file_md(script: GalleryScript, seen_backrefs=None) -> GalleryScriptResults:
1011 """Generate the md file for a given example.
1012  
1013 Parameters
1014 ----------
1015 script : GalleryScript
1016 The script to process
1017  
1018 seen_backrefs : set
1019 The seen backreferences.
1020  
1021 Returns
1022 -------
1023 result: FileResult
1024 The result of running this script
1025 """
1026 seen_backrefs = set() if seen_backrefs is None else seen_backrefs
1027  
1028 # Extract the contents of the script
1029 file_conf, script_blocks, node = split_code_and_text_blocks(script.src_py_file, return_node=True)
1030  
1031 # Extract the title and introduction from the module docstring and save the title in the object
1032 script.title, intro = extract_intro_and_title(docstring=script_blocks[0][1], script=script)
1033  
1034 # Copy source python script to target folder if it is not there/up to date, so that it can be served/downloaded
1035 # Note: surprisingly this uses a md5 too, but not the final .md5 persisted on disk.
1036 script.make_dwnld_py_file()
1037  
1038 # Can the script be entirely skipped (both doc generation and execution) ?
1039 if not script.has_changed_wrt_persisted_md5():
1040 # A priori we can...
1041 skip_and_return = True
1042  
1043 # ...however for executables (not shared modules) we might need to run anyway because of config
1044 if script.is_executable_example():
1045 if script.gallery_conf["run_stale_examples"]:
1046 # Run anyway because config says so.
1047 skip_and_return = False
1048 else:
1049 # Add the example to the "stale examples" before returning
1050 script.gallery_conf["stale_examples"].append(script.dwnld_py_file)
1051 # If expected to fail, let's remove it from the 'expected_failing_examples' list,
1052 # assuming it did when previously executed
1053 if script.src_py_file in script.gallery_conf["expected_failing_examples"]:
1054 script.gallery_conf["expected_failing_examples"].remove(script.src_py_file)
1055  
1056 if skip_and_return:
1057 # Return with 0 exec time and mem usage, and the existing thumbnail
1058 thumb_source_path = script.get_thumbnail_source(file_conf)
1059 thumb_file = create_thumb_from_image(script, thumb_source_path)
1060 return GalleryScriptResults(script=script, intro=intro, exec_time=0.0, memory=0.0, thumb=thumb_file)
1061  
1062 # Reset matplotlib, seaborn, etc. if needed
1063 if script.is_executable_example():
1064 clean_modules(gallery_conf=script.gallery_conf, file=script.src_py_file)
1065  
1066 # Init the runtime vars. Create the images directory and init the image files template
1067 script.init_before_processing()
1068  
1069 if script.is_executable_example():
1070 # Note: this writes the md5 checksum if the example was meant to run
1071 output_blocks, time_elapsed, memory_used = parse_and_execute(script, script_blocks)
1072 logger.debug(f"{script.src_py_file} ran in : {time_elapsed:.2g} seconds\n")
1073 else:
1074 output_blocks = [""] * len(script_blocks)
1075 time_elapsed = memory_used = 0.0 # don't let the output change
1076 logger.debug(f"{script.src_py_file} parsed (not executed)\n")
1077  
1078 # Create as many dummy images as required if needed (default none) so that references to script images
1079 # Can still work, even if the script was not executed (in development mode typically, to go fast).
1080 # See https://sphinx-gallery.github.io/stable/configuration.html#generating-dummy-images
1081 nb_dummy_images_to_generate = file_conf.get("dummy_images", None)
1082 if nb_dummy_images_to_generate is not None:
1083 if type(nb_dummy_images_to_generate) is not int:
1084 raise ExtensionError("mkdocs_gallery: 'dummy_images' setting is not a number, got {dummy_image!r}")
1085  
1086 stock_img = os.path.join(glr_path_static(), "no_image.png")
1087 script.generate_n_dummy_images(img=stock_img, nb=nb_dummy_images_to_generate)
1088  
1089 # Remove the mkdocs-gallery configuration comments from the script if needed
1090 if script.gallery_conf["remove_config_comments"]:
1091 script_blocks = [
1092 (label, remove_config_comments(content), line_number) for label, content, line_number in script_blocks
1093 ]
1094  
1095 # Remove final empty block, which can occur after config comments are removed
1096 if script_blocks[-1][1].isspace():
1097 script_blocks = script_blocks[:-1]
1098 output_blocks = output_blocks[:-1]
1099  
1100 # Generate the markdown string containing the script prose, code and output.
1101 example_md = generate_md_from_blocks(script_blocks, output_blocks, file_conf, script.gallery_conf)
1102  
1103 # Write the generated markdown file
1104 md_header, md_footer = get_example_md_wrapper(script, time_elapsed, memory_used)
1105 full_md = md_header + example_md + md_footer
1106 script.save_md_example(full_md)
1107  
1108 # Create the image thumbnail for the gallery summary
1109 if is_failing_example(script):
1110 # Failing example thumbnail
1111 thumb_source_path = Path(os.path.join(glr_path_static(), "broken_example.png"))
1112 else:
1113 # Get the thumbnail source image, possibly from config
1114 thumb_source_path = script.get_thumbnail_source(file_conf)
1115  
1116 thumb_file = create_thumb_from_image(script, thumb_source_path)
1117  
1118 # Generate the jupyter notebook
1119 example_nb = jupyter_notebook(script, script_blocks)
1120 ipy_file = _new_file(script.ipynb_file)
1121 save_notebook(example_nb, ipy_file)
1122 _replace_by_new_if_needed(ipy_file, md5_mode="t")
1123  
1124 # Write names
1125 if script.gallery_conf["inspect_global_variables"]:
1126 global_variables = script.run_vars.example_globals
1127 else:
1128 global_variables = None
1129  
1130 # TODO dig in just in case
1131 example_code_obj = identify_names(script_blocks, global_variables, node)
1132 if example_code_obj:
1133 # Write a pickle file (.pickle) containing `example_code_obj`
1134 codeobj_fname = _new_file(script.codeobj_file)
1135 with open(codeobj_fname, "wb") as fid:
1136 pickle.dump(example_code_obj, fid, pickle.HIGHEST_PROTOCOL)
1137 _replace_by_new_if_needed(codeobj_fname)
1138  
1139 backrefs = set(
1140 "{module_short}.{name}".format(**cobj)
1141 for cobjs in example_code_obj.values()
1142 for cobj in cobjs
1143 if cobj["module"].startswith(script.gallery_conf["doc_module"])
1144 )
1145  
1146 # Create results object
1147 res = GalleryScriptResults(
1148 script=script,
1149 intro=intro,
1150 exec_time=time_elapsed,
1151 memory=memory_used,
1152 thumb=thumb_file,
1153 )
1154  
1155 # Write backreferences if required
1156 if script.gallery_conf["backreferences_dir"] is not None:
1157 _write_backreferences(backrefs, seen_backrefs, script_results=res)
1158  
1159 return res
1160  
1161  
1162 # TODO the note should only appear in html mode. (.. only:: html)
1163 # TODO maybe remove as much as possible the css for now?
1164 EXAMPLE_HEADER = """
1165 <!--
1166 DO NOT EDIT.
1167 THIS FILE WAS AUTOMATICALLY GENERATED BY mkdocs-gallery.
1168 TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
1169 "{pyfile_to_edit}"
1170 LINE NUMBERS ARE GIVEN BELOW.
1171 -->
1172  
1173 !!! note
1174  
1175 Click [here](#download_links)
1176 to download the full example code{opt_binder_text}
1177  
1178 """ # TODO there was a {{: .mkd-glr-example-title }} for the title but is it useful ?
1179 MD_BLOCK_HEADER = """\
1180 <!-- GENERATED FROM PYTHON SOURCE LINES {0}-{1} -->
1181  
1182 """
1183  
1184  
1185 def generate_md_from_blocks(script_blocks, output_blocks, file_conf, gallery_conf) -> str:
1186 """Generate the md string containing the script prose, code and output.
1187  
1188 Parameters
1189 ----------
1190 script_blocks : list
1191 (label, content, line_number)
1192 List where each element is a tuple with the label ('text' or 'code'),
1193 the corresponding content string of block and the leading line number
1194  
1195 output_blocks : list
1196 List of strings where each element is the restructured text
1197 representation of the output of each block
1198  
1199 file_conf : dict
1200 File-specific settings given in source file comments as:
1201 ``# mkdocs_gallery_<name> = <value>``
1202  
1203 gallery_conf : dict
1204 Contains the configuration of mkdocs-gallery
1205  
1206 Returns
1207 -------
1208 out : str
1209 The resulting markdown page.
1210 """
1211  
1212 # A simple example has two blocks: one for the
1213 # example introduction/explanation and one for the code
1214 is_example_notebook_like = len(script_blocks) > 2
1215 example_md = ""
1216 for bi, ((blabel, bcontent, lineno), code_output) in enumerate(zip(script_blocks, output_blocks)):
1217 # do not add comment to the title block (bi=0), otherwise the linking does not work properly
1218 if bi > 0:
1219 example_md += MD_BLOCK_HEADER.format(lineno, lineno + bcontent.count("\n"))
1220  
1221 if blabel == "code":
1222 if not file_conf.get("line_numbers", gallery_conf.get("line_numbers", False)):
1223 lineno = None
1224  
1225 code_md = codestr2md(bcontent, lang=gallery_conf["lang"], lineno=lineno) + "\n"
1226 if is_example_notebook_like:
1227 example_md += code_md
1228 example_md += code_output
1229 else:
1230 example_md += code_output
1231 if "mkd-glr-script-out" in code_output:
1232 # Add some vertical space after output
1233 example_md += "\n\n<br />\n\n" # "|\n\n"
1234 example_md += code_md
1235 else:
1236 block_separator = "\n\n" if not bcontent.endswith("\n") else "\n"
1237 example_md += bcontent + block_separator
1238  
1239 return example_md
1240  
1241  
1242 def get_example_md_wrapper(script: GalleryScript, time_elapsed: float, memory_used: float) -> Tuple[str, str]:
1243 """Creates the headers and footers for the example markdown. Returns a template
1244  
1245 Parameters
1246 ----------
1247 script : GalleryScript
1248 The script for which to generate the md.
1249  
1250 time_elapsed : float
1251 Time elapsed in seconds while executing file
1252  
1253 memory_used : float
1254 Additional memory used during the run.
1255  
1256 Returns
1257 -------
1258 md_before : str
1259 Part of the final markdown that goes before the notebook / python script.
1260  
1261 md_after : str
1262 Part of the final markdown that goes after the notebook / python script.
1263 """
1264 # Check binder configuration
1265 binder_conf = check_binder_conf(script.gallery_conf.get("binder"))
1266 use_binder = len(binder_conf) > 0
1267  
1268 # Write header
1269 src_relative = script.src_py_file_rel_project.as_posix()
1270 binder_text = " or to run this example in your browser via Binder" if use_binder else ""
1271 md_before = EXAMPLE_HEADER.format(pyfile_to_edit=src_relative, opt_binder_text=binder_text)
1272  
1273 # Footer
1274 md_after = ""
1275 # Report Time and Memory
1276 if time_elapsed >= script.gallery_conf["min_reported_time"]:
1277 time_m, time_s = divmod(time_elapsed, 60)
1278 md_after += TIMING_CONTENT.format(time_m, time_s)
1279  
1280 if script.gallery_conf["show_memory"]:
1281 md_after += f"**Estimated memory usage:** {memory_used:.0f} MB\n\n"
1282  
1283 # Download buttons
1284 # - Generate a binder URL if specified
1285 binder_badge_md = gen_binder_md(script, binder_conf) if use_binder else ""
1286 # - Rely on mkdocs-material for the icon
1287 icon = ":fontawesome-solid-download:"
1288 # - Generate the download buttons
1289 # TODO why aren't they centered actually ? does .center work ?
1290 md_after += f"""
1291 <div id="download_links"></div>
1292  
1293 {binder_badge_md}
1294  
1295 [{icon} Download Python source code: {script.dwnld_py_file.name}](./{script.dwnld_py_file.name}){{ .md-button .center}}
1296  
1297 [{icon} Download Jupyter notebook: {script.ipynb_file.name}](./{script.ipynb_file.name}){{ .md-button .center}}
1298 """
1299  
1300 # Add the "generated by mkdocs-gallery" footer
1301 md_after += MKD_GLR_SIG
1302  
1303 return md_before, md_after