mkdocs_gallery/gen_single.py - flake8 annotated source

1 # Authors: Sylvain MARIE <sylvain.marie@se.com>

2 # + All contributors to <https://github.com/smarie/mkdocs-gallery>

3 #

4 # Original idea and code: sphinx-gallery, <https://sphinx-gallery.github.io>

5 # License: 3-clause BSD, <https://github.com/smarie/mkdocs-gallery/blob/master/LICENSE>

6 """

7 Generator for a single script example in a gallery.

8 """

9

10 from __future__ import absolute_import, division, print_function

11

12 import ast

13 import codeop

14 import contextlib

F401 'copy' imported but unused

15 import copy

16 import gc

17 import importlib

18 import os

S403 Consider possible security implications associated with pickle module.

19 import pickle

20 import re

S404 Consider possible security implications associated with the subprocess module.

21 import subprocess

22 import sys

23 import traceback

24 import warnings

25 from copy import deepcopy

26 from functools import partial

27 from io import StringIO

28 from pathlib import Path

29 from shutil import copyfile

30 from textwrap import indent, dedent

31 from time import time

32 from typing import List, Set, Tuple

33

34 from tqdm import tqdm

35

36 from . import glr_path_static, mkdocs_compatibility

37 from .backreferences import _thumbnail_div, _write_backreferences, identify_names

38 from .binder import check_binder_conf, gen_binder_md

39 from .errors import ExtensionError

40 from .gen_data_model import GalleryBase, GalleryScript, GalleryScriptResults

41 from .notebook import jupyter_notebook, save_notebook

42 from .py_source_parser import remove_config_comments, split_code_and_text_blocks

43 from .scrapers import ImageNotFoundError, _find_image_ext, clean_modules, save_figures

44 from .utils import _new_file, _replace_by_new_if_needed, optipng, rescale_image, run_async

45

46 logger = mkdocs_compatibility.getLogger("mkdocs-gallery")

47

48

49 ###############################################################################

50

51

52 class _LoggingTee(object):

53 """A tee object to redirect streams to the logger."""

54

55 def __init__(self, src_filename: Path):

56 self.logger = logger

57 self.src_filename = src_filename

58 self.logger_buffer = ""

59 self.set_std_and_reset_position()

60

61 def set_std_and_reset_position(self):

62 if not isinstance(sys.stdout, _LoggingTee):

63 self.origs = (sys.stdout, sys.stderr)

64 sys.stdout = sys.stderr = self

65 self.first_write = True

66 self.output = StringIO()

67 return self

68

69 def restore_std(self):

70 sys.stdout.flush()

71 sys.stderr.flush()

72 sys.stdout, sys.stderr = self.origs

73

74 def write(self, data):

75 self.output.write(data)

76

77 if self.first_write:

78 self.logger.verbose("Output from %s", self.src_filename) # color='brown')

79 self.first_write = False

80

81 data = self.logger_buffer + data

82 lines = data.splitlines()

83 if data and data[-1] not in "\r\n":

84 # Wait to write last line if it's incomplete. It will write next

85 # time or when the LoggingTee is flushed.

86 self.logger_buffer = lines[-1]

87 lines = lines[:-1]

88 else:

89 self.logger_buffer = ""

90

91 for line in lines:

92 self.logger.verbose("%s", line)

93

94 def flush(self):

95 self.output.flush()

96 if self.logger_buffer:

97 self.logger.verbose("%s", self.logger_buffer)

98 self.logger_buffer = ""

99

100 # When called from a local terminal seaborn needs it in Python3

101 def isatty(self):

102 return self.output.isatty()

103

104 # When called in gen_single, conveniently use context managing

105 def __enter__(self):

106 return self

107

108 def __exit__(self, type_, value, tb):

109 self.restore_std()

110

111

112 ###############################################################################

113 # The following strings are used when we have several pictures: we use

114 # an html div tag that our CSS uses to turn the lists into horizontal

115 # lists.

116 HLIST_HEADER = """

117 {: .mkd-glr-horizontal }

118

119 """

120

121 HLIST_IMAGE_TEMPLATE = """

122 *

123

124 .. image:: /%s

125 {: .mkd-glr-multi-img }

126 """

127

128 SINGLE_IMAGE = """

129 .. image:: /%s

130 {: .mkd-glr-single-img }

131 """

132

133 # Note: since this seems to be a one-liner, we use inline code. TODO check

134 CODE_OUTPUT = """Out:

135 {{: .mkd-glr-script-out }}

136

137 ```{{.shell .mkd-glr-script-out-disp }}

138 {0}

139 ```

140 \n"""

141

142 TIMING_CONTENT = """

143 **Total running time of the script:** ({0: .0f} minutes {1: .3f} seconds)

144 """ # Strange enough: this CSS class does not actually exist in sphinx-gallery {{: .mkd-glr-timing }}

145

146 # TODO only if html ? .. only:: html

147 MKD_GLR_SIG = """\n

148 [Gallery generated by mkdocs-gallery](https://smarie.github.io/mkdocs-gallery){: .mkd-glr-signature }

149 """

150

151 # Header used to include raw html from data _repr_html_

152 HTML_HEADER = """<div class="output_subarea output_html rendered_html output_result">

153 {0}

154 </div>

155 """

156

157

158 def codestr2md(codestr, lang: str = "python", lineno=None, is_exc: bool = False):

159 """Return markdown code block from code string."""

160

161 # if lineno is not None:

162 # # Sphinx only starts numbering from the first non-empty line.

163 # blank_lines = codestr.count('\n', 0, -len(codestr.lstrip()))

164 # lineno = ' :lineno-start: {0}\n'.format(lineno + blank_lines)

165 # else:

166 # lineno = ''

167 # code_directive = ".. code-block:: {0}\n{1}\n".format(lang, lineno)

168 # indented_block = indent(codestr, ' ' * 4)

169 # return code_directive + indented_block

170 style = " .mkd-glr-script-err-disp" if is_exc else ""

171 if lineno is not None:

172 # Sphinx only starts numbering from the first non-empty line. TODO do we need this too ?

173 # blank_lines = codestr.count('\n', 0, -len(codestr.lstrip()))

174 return f'```{{.{lang} {style} linenums="{lineno}"}}\n{codestr}```\n'

175 else:

176 return f"```{{.{lang} {style}}}\n{codestr}```\n"

177

178

179 def _regroup(x):

180 x = x.groups()

181 return x[0] + x[1].split(".")[-1] + x[2]

182

183

184 def _sanitize_md(string):

185 """Use regex to remove at least some sphinx directives.

186

187 TODO is this still needed ?

188 """

189 # :class:`a.b.c <thing here>`, :ref:`abc <thing here>` --> thing here

190 p, e = r"(\s|^):[^:\s]+:`", r"`(\W|$)"

191 string = re.sub(p + r"\S+\s*<([^>`]+)>" + e, r"\1\2\3", string)

192 # :class:`~a.b.c` --> c

193 string = re.sub(p + r"~([^`]+)" + e, _regroup, string)

194 # :class:`a.b.c` --> a.b.c

195 string = re.sub(p + r"([^`]+)" + e, r"\1\2\3", string)

196

197 # ``whatever thing`` --> whatever thing

198 p = r"(\s|^)`"

199 string = re.sub(p + r"`([^`]+)`" + e, r"\1\2\3", string)

200 # `whatever thing` --> whatever thing

201 string = re.sub(p + r"([^`]+)" + e, r"\1\2\3", string)

202 return string

203

204

205 # Find RST/Markdown title chars,

206 # i.e. lines that consist of (3 or more of the same) 7-bit non-ASCII chars.

207 # This conditional is not perfect but should hopefully be good enough.

208 RE_3_OR_MORE_NON_ASCII = r"([\W _])\1{3,}" # 3 or more identical chars

209

210 RST_TITLE_MARKER = re.compile(rf"^[ ]*{RE_3_OR_MORE_NON_ASCII}[ ]*$")

211 MD_TITLE_MARKER = re.compile(r"^[ ]*[#]+[ ]*(.*)[ ]*$") # One or more starting hash with optional whitespaces before.

212 FIRST_NON_MARKER_WITHOUT_HASH = re.compile(rf"^[# ]*(?!{RE_3_OR_MORE_NON_ASCII})[# ]*(.+)", re.MULTILINE)

213

214

215 def extract_readme_title(file: Path, contents: str) -> str:

216 """Same as `extract_intro_and_title` for the readme files in galleries, but does not return the introduction.

217

218 Parameters

219 ----------

220 file : Path

221 The readme file path (used for error messages only).

222

223 contents : str

224 The already parsed readme contents

225

226 Returns

227 -------

228 title : str

229 The readme title

230 """

231 # Remove html comments.

232 contents = re.sub("()", "", contents, flags=re.DOTALL)

233

234 match = FIRST_NON_MARKER_WITHOUT_HASH.search(contents)

235 if match is None:

236 raise ExtensionError(f"Could not find a title in readme file: {file}")

237

238 title = match.group(2).strip()

239 return title

240

241

242 def extract_readme_last_subtitle(file: Path, contents: str) -> str:

243 """Same as `extract_intro_and_title` for the readme files in galleries, but does not return the introduction.

244

245 Parameters

246 ----------

247 file : Path

248 The readme file path (used for error messages only).

249

250 contents : str

251 The already parsed readme contents

252

253 Returns

254 -------

255 last_subtitle : str

256 The readme last title, or None.

257 """

258 paragraphs = extract_paragraphs(contents)

259

260 # iterate from last paragraph

261 last_subtitle = None

262 for p in reversed(paragraphs):

263 current_is_good = False

264 for line in reversed(p.splitlines()):

265 if current_is_good:

266 last_subtitle = line

267 break

268 # Does this line contain a title ?

269 # - md style

270 md_match = MD_TITLE_MARKER.search(line)

271 if md_match:

272 last_subtitle = md_match.group(1)

273 break

274

275 # - rst style

276 rst_match = RST_TITLE_MARKER.search(line)

277 if rst_match:

278 current_is_good = True

279

280 if last_subtitle:

281 break

282

283 return last_subtitle

284

285

286 def extract_paragraphs(doc: str) -> List[str]:

287 # lstrip is just in case docstring has a '\n\n' at the beginning

288 paragraphs = doc.lstrip().split("\n\n")

289

290 # remove comments and other syntax like `.. _link:`

291 paragraphs = [p for p in paragraphs if not p.startswith(".. ") and len(p) > 0]

292

293 return paragraphs

294

295

296 def extract_intro_and_title(docstring: str, script: GalleryScript) -> Tuple[str, str]:

297 """Extract and clean the first paragraph of module-level docstring.

298

299 The title is not saved in the `script` object in this process, users have to do it explicitly.

300

301 Parameters

302 ----------

303 docstring : str

304 The docstring extracted from the top of the script.

305

306 script : GalleryScript

307 The script where the docstring was extracted from (used for error messages only).

308

309 Returns

310 -------

311 title : str

312 The title

313

314 introduction : str

315 The introduction

316 """

317 # Extract paragraphs from the text

318 paragraphs = extract_paragraphs(docstring)

319 if len(paragraphs) == 0:

320 raise ExtensionError(

321 f"Example docstring should have a header for the example title. "

322 f"Please check the example file:\n {script.script_file}\n"

323 )

324

325 # Title is the first paragraph with any RST/Markdown title chars

326 # removed, i.e. lines that consist of (3 or more of the same) 7-bit

327 # non-ASCII chars.

328 # This conditional is not perfect but should hopefully be good enough.

329 title_paragraph = paragraphs[0]

330 match = FIRST_NON_MARKER_WITHOUT_HASH.search(title_paragraph)

331 if match is None:

332 raise ExtensionError(f"Could not find a title in first paragraph:\n{title_paragraph}")

333

334 title = match.group(2).strip()

335

336 # Use the title if no other paragraphs are provided

337 intro_paragraph = title if len(paragraphs) < 2 else paragraphs[1]

338

339 # Concatenate all lines of the first paragraph

340 intro = re.sub("\n", " ", intro_paragraph)

341 intro = _sanitize_md(intro)

342

343 # Truncate at 95 chars

344 if len(intro) > 95:

345 intro = intro[:95] + "..."

346

347 return title, intro

348

349

350 def create_thumb_from_image(script: GalleryScript, src_image_path: Path) -> Path:

351 """Create a thumbnail image from the `src_image_path`.

352

353 Parameters

354 ----------

355 script : GalleryScript

356 The gallery script.

357

358 src_image_path : Path

359 The source image path, with some flexibility about the extension.

360 TODO do we actually need this flexibility here ?

361

362 Returns

363 -------

364 actual_thumb_file : Path

365 The actual thumbnail file generated.

366 """

367 try:

368 # Find the image, with flexibility about the actual extenstion ('png', 'svg', 'jpg', 'gif' are supported)

369 src_image_path, ext = _find_image_ext(src_image_path)

370 except ImageNotFoundError:

371 # The source image does not exist !

372 try:

373 # Does a thumbnail already exist ? with extenstion ('png', 'svg', 'jpg', 'gif')

374 thumb_file, ext = _find_image_ext(script.get_thumbnail_file(".png"))

375 # Yes - let's assume this one will suit the needs

376 return thumb_file

377 except ImageNotFoundError:

378 # Create something to replace the thumbnail

379 default_thumb_path = script.gallery_conf.get("default_thumb_file")

380 if default_thumb_path is None:

381 default_thumb_path = os.path.join(glr_path_static(), "no_image.png")

382

383 src_image_path, ext = _find_image_ext(Path(default_thumb_path))

384

385 # Now let's create the thumbnail.

386 # - First Make sure the thumb dir exists

387 script.gallery.make_thumb_dir()

388

389 # - Then create the thum file by copying the src image, possibly rescaling it.

390 thumb_file = script.get_thumbnail_file(ext)

391 if ext in (".svg", ".gif"):

392 # No need to rescale image

393 copyfile(src_image_path, thumb_file)

394 else:

395 # Need to rescale image

396 max_width, max_hegiht = script.gallery_conf["thumbnail_size"]

397 rescale_image(

398 in_file=src_image_path,

399 out_file=thumb_file,

400 max_width=max_width,

401 max_height=max_hegiht,

402 )

403 if "thumbnails" in script.gallery_conf["compress_images"]:

404 optipng(thumb_file, script.gallery_conf["compress_images_args"])

405

406 return thumb_file

407

408

409 def generate(gallery: GalleryBase, seen_backrefs: Set) -> Tuple[str, str, str, List[GalleryScriptResults]]:

410 """

411 Generate the gallery md for an example directory, including the index.

412

413 Parameters

414 ----------

415 gallery : GalleryBase

416 The gallery or subgallery to process

417

418 seen_backrefs : Set

419 Backrefs seen so far.

420

421 Returns

422 -------

423 title : str

424 The gallery title, that is, the title of the readme file.

425

426 root_subtitle : str

427 The gallery suptitle that will be used in case the gallery has subsections.

428

429 index_md : str

430 The markdown to include in the global gallery readme.

431

432 results : List[GalleryScriptResults]

433 A list of processing results for all scripts in this gallery.

434 """

435 # Read the gallery readme and add it to the index

436 readme_contents = gallery.readme_file.read_text(encoding="utf-8")

437 readme_title = extract_readme_title(gallery.readme_file, readme_contents)

438 if gallery.has_subsections():

439 # parse and try to also extract the last subtitle

440 last_readme_subtitle = extract_readme_last_subtitle(gallery.readme_file, readme_contents)

441 else:

442 # Dont look for the last subtitle

443 last_readme_subtitle = None

444

445 # Create the destination dir if needed

446 gallery.make_generated_dir()

447

448 all_thumbnail_entries = []

449 results = []

450

451 for script in tqdm(gallery.scripts, desc=f"generating gallery for {gallery.generated_dir}... "):

452 # Generate all files related to this example: download file, jupyter notebook, pickle, markdown...

453 script_results = generate_file_md(script=script, seen_backrefs=seen_backrefs)

454 results.append(script_results)

455

456 # Create the thumbnails-containing div <div class="mkd-glr-thumbcontainer" ...> to place in the readme

457 thumb_div = _thumbnail_div(script_results)

458 all_thumbnail_entries.append(thumb_div)

459

460 # Write the gallery summary index.md

461 # Note: we write the HTML comment at the bottom instead of the top because having it at the top prevents html

462 # page metadata from mkdocs-material to be processed correctly. See GH#96

463 index_md = f"""{readme_contents}

464

465 {"".join(all_thumbnail_entries)}

466 <div class="mkd-glr-clear"></div>

467

468 

469

470 """

471 # Note: the "clear" is to disable floating elements again, now that the gallery section is over.

472

473 return readme_title, last_readme_subtitle, index_md, results

474

475

476 def is_failing_example(script: GalleryScript):

477 return script.src_py_file in script.gallery_conf["failing_examples"]

478

479

480 def handle_exception(exc_info, script: GalleryScript):

481 """Trim and format exception, maybe raise error, etc."""

482 from .gen_gallery import _expected_failing_examples

483

484 etype, exc, tb = exc_info

485 stack = traceback.extract_tb(tb)

486 # The full traceback will look something like:

487 #

488 # File "/home/larsoner/python/mkdocs-gallery/sphinx_gallery/gen_single.py...

489 # mem_max, _ = gallery_conf['call_memory'](

490 # File "/home/larsoner/python/mkdocs-gallery/sphinx_gallery/gen_galler...

491 # mem, out = memory_usage(func, max_usage=True, retval=True,

492 # File "/home/larsoner/.local/lib/python3.8/site-packages/memory_profi...

493 # returned = f(*args, **kw)

494 # File "/home/larsoner/python/mkdocs-gallery/sphinx_gallery/gen_single.py...

495 # exec(self.code, self.fake_main.__dict__)

496 # File "/home/larsoner/python/mkdocs-gallery/sphinx_gallery/tests/tiny...

497 # raise RuntimeError('some error')

498 # RuntimeError: some error

499 #

500 # But we should trim these to just the relevant trace at the user level,

501 # so we inspect the traceback to find the start and stop points.

502 start = 0

503 stop = len(stack)

504 root = os.path.dirname(__file__) + os.sep

505 for ii, s in enumerate(stack, 1):

506 # Trim our internal stack

507 if s.filename.startswith(root + "gen_gallery.py") and s.name == "call_memory":

508 start = max(ii, start)

509 elif s.filename.startswith(root + "gen_single.py"):

510 # SyntaxError

511 if s.name == "execute_code_block" and ("compile(" in s.line or "save_figures" in s.line):

512 start = max(ii, start)

513 # Any other error

514 elif s.name == "__call__":

515 start = max(ii, start)

516 # Our internal input() check

517 elif s.name == "_check_input" and ii == len(stack):

518 stop = ii - 1

519 stack = stack[start:stop]

520

521 formatted_exception = "Traceback (most recent call last):\n" + "".join(

522 traceback.format_list(stack) + traceback.format_exception_only(etype, exc)

523 )

524

525 src_file = script.src_py_file

526 expected = src_file in _expected_failing_examples(

527 gallery_conf=script.gallery_conf,

528 mkdocs_conf=script.gallery.all_info.mkdocs_conf,

529 )

530 if expected:

531 # func, color = logger.info, 'blue'

532 func = logger.info

533 else:

534 # func, color = logger.warning, 'red'

535 func = logger.warning

536 func(f"{src_file} failed to execute correctly: {formatted_exception}") # , color=color)

537

538 except_md = codestr2md(formatted_exception, lang="pytb", is_exc=True)

539

540 # Ensure it's marked as our style: this is now already done in codestr2md

541 # except_md = "{: .mkd-glr-script-out }\n\n" + except_md

542 return except_md, formatted_exception

543

544

545 # Adapted from github.com/python/cpython/blob/3.7/Lib/warnings.py

546 def _showwarning(message, category, filename, lineno, file=None, line=None):

547 if file is None:

548 file = sys.stderr

549 if file is None:

550 # sys.stderr is None when run with pythonw.exe:

551 # warnings get lost

552 return

553 text = warnings.formatwarning(message, category, filename, lineno, line)

554 try:

555 file.write(text)

556 except OSError:

557 # the file (probably stderr) is invalid - this warning gets lost.

558 pass

559

560

561 @contextlib.contextmanager

562 def patch_warnings():

563 """Patch warnings.showwarning to actually write out the warning."""

564 # Sphinx or logging or someone is patching warnings, but we want to

565 # capture them, so let's patch over their patch...

566 orig_showwarning = warnings.showwarning

567 try:

568 warnings.showwarning = _showwarning

569 yield

570 finally:

571 warnings.showwarning = orig_showwarning

572

573

574 class _exec_once(object):

575 """Deal with memory_usage calling functions more than once (argh)."""

576

577 def __init__(self, code, fake_main):

578 self.code = code

579 self.fake_main = fake_main

580 self.run = False

581

582 def __call__(self):

583 if not self.run:

584 self.run = True

585 old_main = sys.modules.get("__main__", None)

586 with patch_warnings():

587 sys.modules["__main__"] = self.fake_main

588 try:

589 exec(self.code, self.fake_main.__dict__) # noqa # our purpose is to execute code :)

590 finally:

591 if old_main is not None:

592 sys.modules["__main__"] = old_main

593

594

595 def _get_memory_base(gallery_conf):

596 """Get the base amount of memory used by running a Python process."""

597 if not gallery_conf["plot_gallery"]:

598 return 0.0

599 # There might be a cleaner way to do this at some point

600 from memory_profiler import memory_usage

601

602 if sys.platform in ("win32", "darwin"):

603 sleep, timeout = (1, 2)

604 else:

605 sleep, timeout = (0.5, 1)

S603 Subprocess call - check for execution of untrusted input.

606 proc = subprocess.Popen(

607 [sys.executable, "-c", "import time, sys; time.sleep(%s); sys.exit(0)" % sleep],

608 close_fds=True,

609 )

610 memories = memory_usage(proc, interval=1e-3, timeout=timeout)

611 kwargs = dict(timeout=timeout) if sys.version_info >= (3, 5) else {}

612 proc.communicate(**kwargs)

613 # On OSX sometimes the last entry can be None

614 memories = [mem for mem in memories if mem is not None] + [0.0]

615 memory_base = max(memories)

616 return memory_base

617

618

619 def _ast_module():

620 """Get ast.Module function, dealing with:

621 https://bugs.python.org/issue35894"""

622 if sys.version_info >= (3, 8):

623 ast_Module = partial(ast.Module, type_ignores=[])

624 else:

625 ast_Module = ast.Module

626 return ast_Module

627

628

629 def _check_reset_logging_tee(src_file: Path):

630 # Helper to deal with our tests not necessarily calling parse_and_execute

631 # but rather execute_code_block directly

632 if isinstance(sys.stdout, _LoggingTee):

633 logging_tee = sys.stdout

634 else:

635 logging_tee = _LoggingTee(src_file)

636 logging_tee.set_std_and_reset_position()

637 return logging_tee

638

639

640 def _exec_and_get_memory(compiler, ast_Module, code_ast, script: GalleryScript):

641 """Execute ast, capturing output if last line is expression and get max memory usage."""

642

643 src_file = script.src_py_file.as_posix()

644

645 # capture output if last line is expression

646 is_last_expr = False

647

648 if len(code_ast.body) and isinstance(code_ast.body[-1], ast.Expr):

649 is_last_expr = True

650 last_val = code_ast.body.pop().value

651 # exec body minus last expression

652 mem_body, _ = script.gallery_conf["call_memory"](

653 _exec_once(compiler(code_ast, src_file, "exec"), script.run_vars.fake_main)

654 )

655 # exec last expression, made into assignment

656 body = [ast.Assign(targets=[ast.Name(id="___", ctx=ast.Store())], value=last_val)]

657 last_val_ast = ast_Module(body=body)

658 ast.fix_missing_locations(last_val_ast)

659 mem_last, _ = script.gallery_conf["call_memory"](

660 _exec_once(compiler(last_val_ast, src_file, "exec"), script.run_vars.fake_main)

661 )

662 mem_max = max(mem_body, mem_last)

663 else:

664 mem_max, _ = script.gallery_conf["call_memory"](

665 _exec_once(compiler(code_ast, src_file, "exec"), script.run_vars.fake_main)

666 )

667

668 return is_last_expr, mem_max

669

670

671 def _get_last_repr(gallery_conf, ___):

672 """Get a repr of the last expression, using first method in 'capture_repr'

673 available for the last expression."""

674 for meth in gallery_conf["capture_repr"]:

675 try:

676 last_repr = getattr(___, meth)()

677 # for case when last statement is print()

678 if last_repr is None or last_repr == "None":

679 repr_meth = None

680 else:

681 repr_meth = meth

682 except Exception:

683 last_repr = None

684 repr_meth = None

685 else:

686 if isinstance(last_repr, str):

687 break

688 return last_repr, repr_meth

689

690

691 def _get_code_output(is_last_expr, script: GalleryScript, logging_tee, images_md):

692 """Obtain standard output and html output in md."""

693

694 example_globals = script.run_vars.example_globals

695 gallery_conf = script.gallery_conf

696

697 last_repr = None

698 repr_meth = None

699 if is_last_expr:

700 # capture the last repr variable

701 ___ = example_globals["___"]

702 ignore_repr = False

703 if gallery_conf["ignore_repr_types"]:

704 ignore_repr = re.search(gallery_conf["ignore_repr_types"], str(type(___)))

705 if gallery_conf["capture_repr"] != () and not ignore_repr:

706 last_repr, repr_meth = _get_last_repr(gallery_conf, ___)

707

708 captured_std = logging_tee.output.getvalue().expandtabs()

709

710 # normal string output

711 if repr_meth in ["__repr__", "__str__"] and last_repr:

712 captured_std = f"{captured_std}\n{last_repr}"

713

714 if captured_std and not captured_std.isspace():

715 captured_std = CODE_OUTPUT.format(captured_std)

716 else:

717 captured_std = ""

718

719 # give html output its own header

720 if repr_meth == "_repr_html_":

721 captured_html = HTML_HEADER.format(indent(last_repr, " " * 4))

722 else:

723 captured_html = ""

724

725 code_output = f"""

726 {images_md}

727

728 {captured_std}

729

730 {captured_html}

731

732 """

733 return code_output

734

735

736 def _reset_cwd_syspath(cwd, path_to_remove):

737 """Reset current working directory to `cwd` and remove `path_to_remove` from `sys.path`."""

738 if path_to_remove in sys.path:

739 sys.path.remove(path_to_remove)

740 os.chdir(cwd)

741

742

743 def _parse_code(bcontent, src_file, *, compiler_flags):

744 code_ast = compile(bcontent, src_file, "exec", compiler_flags | ast.PyCF_ONLY_AST, dont_inherit=1)

745 if _needs_async_handling(bcontent, src_file, compiler_flags=compiler_flags):

746 code_ast = _apply_async_handling(code_ast, compiler_flags=compiler_flags)

747 return code_ast

748

749

750 def _needs_async_handling(bcontent, src_file, *, compiler_flags) -> bool:

751 try:

752 compile(bcontent, src_file, "exec", compiler_flags, dont_inherit=1)

753 except SyntaxError as error:

754 # mkdocs-gallery supports top-level async code similar to jupyter notebooks.

755 # Without handling, this will raise a SyntaxError. In such a case, we apply a

756 # minimal async handling and try again. If the error persists, we bubble it up

757 # and let the caller handle it.

758 try:

759 compile(

760 f"async def __async_wrapper__():\n{indent(bcontent, ' ' * 4)}",

761 src_file,

762 "exec",

763 compiler_flags,

764 dont_inherit=1,

765 )

766 except SyntaxError:

767 # Raise the original error to avoid leaking the internal async handling to

768 # generated output.

769 raise error from None

770 else:

771 return True

772 else:

773 return False

774

775

776 def _apply_async_handling(code_ast, *, compiler_flags):

777 async_handling = compile(

778 dedent(

779 """

780 async def __async_wrapper__():

781 # original AST goes here

782 return locals()

783 __async_wrapper_locals__ = __run_async__(__async_wrapper__())

784 __async_wrapper_result__ = __async_wrapper_locals__.pop("__async_wrapper_result__", None)

785 globals().update(__async_wrapper_locals__)

786 __async_wrapper_result__

787 """

788 ),

789 "<_apply_async_handling()>",

790 "exec",

791 compiler_flags | ast.PyCF_ONLY_AST,

792 dont_inherit=1,

793 )

794

795 *original_body, last_node = code_ast.body

796 if isinstance(last_node, ast.Expr):

797 last_node = ast.Assign(

798 targets=[ast.Name(id="__async_wrapper_result__", ctx=ast.Store())], value=last_node.value

799 )

800 original_body.append(last_node)

801

802 async_wrapper = async_handling.body[0]

803 async_wrapper.body = [*original_body, *async_wrapper.body]

804

805 return ast.fix_missing_locations(async_handling)

806

807

808 def execute_code_block(compiler, block, script: GalleryScript):

809 """Execute the code block of the example file.

810

811 Parameters

812 ----------

813 compiler : codeop.Compile

814 Compiler to compile AST of code block.

815

816 block : List[Tuple[str, str, int]]

817 List of Tuples, each Tuple contains label ('text' or 'code'),

818 the corresponding content string of block and the leading line number.

819

820 script: GalleryScript

821 The gallery script

822

823 Returns

824 -------

825 code_output : str

826 Output of executing code in md.

827 """

828 # if script.run_vars.example_globals is None: # testing shortcut

829 # script.run_vars.example_globals = script.run_vars.fake_main.__dict__

830

831 blabel, bcontent, lineno = block

832

833 # If example is not suitable to run anymore, skip executing its blocks

834 if script.run_vars.stop_executing or blabel == "text":

835 return ""

836

837 cwd = os.getcwd()

838 # Redirect output to stdout

839 src_file = script.src_py_file

840 logging_tee = _check_reset_logging_tee(src_file)

841 assert isinstance(logging_tee, _LoggingTee) # noqa

842

843 # First cd in the original example dir, so that any file

844 # created by the example get created in this directory

845 os.chdir(src_file.parent)

846

847 # Add the example dir to the path temporarily (will be removed after execution)

848 new_path = os.getcwd()

849 sys.path.append(new_path)

850

851 # Save figures unless there is a `mkdocs_gallery_defer_figures` flag

852 match = re.search(r"^[\ \t]*#\s*mkdocs_gallery_defer_figures[\ \t]*\n?", bcontent, re.MULTILINE)

853 need_save_figures = match is None

854

855 try:

856 ast_Module = _ast_module()

857 code_ast = _parse_code(bcontent, src_file, compiler_flags=compiler.flags)

858 ast.increment_lineno(code_ast, lineno - 1)

859

860 is_last_expr, mem_max = _exec_and_get_memory(compiler, ast_Module, code_ast, script=script)

861 script.run_vars.memory_used_in_blocks.append(mem_max)

862

863 # This should be inside the try block, e.g., in case of a savefig error

864 logging_tee.restore_std()

865 if need_save_figures:

866 need_save_figures = False

867 images_md = save_figures(block, script)

868 else:

869 images_md = ""

870

871 except Exception:

872 logging_tee.restore_std()

873 except_md, formatted_exception = handle_exception(sys.exc_info(), script)

874

875 # Breaks build on first example error

876 if script.gallery_conf["abort_on_example_error"]:

877 raise

878

879 # Stores failing file

880 script.gallery_conf["failing_examples"][src_file] = formatted_exception

881

882 # Stop further execution on that script

883 script.run_vars.stop_executing = True

884

885 code_output = "\n{0}\n\n\n\n".format(except_md)

886 # still call this even though we won't use the images so that

887 # figures are closed

888 if need_save_figures:

889 save_figures(block, script)

890 else:

891 _reset_cwd_syspath(cwd, new_path)

892

893 code_output = _get_code_output(is_last_expr, script, logging_tee, images_md)

894 finally:

895 _reset_cwd_syspath(cwd, new_path)

896 logging_tee.restore_std()

897

898 # Sanitize ANSI escape characters from MD output

899 ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")

900 code_output = ansi_escape.sub("", code_output)

901

902 return code_output

903

904

905 def _check_input(prompt=None):

906 raise ExtensionError("Cannot use input() builtin function in mkdocs-gallery examples")

907

908

909 def parse_and_execute(script: GalleryScript, script_blocks):

910 """Execute and capture output from python script already in block structure

911

912 Parameters

913 ----------

914 script : GalleryScript

915 The script

916

917 script_blocks : list

918 (label, content, line_number)

919 List where each element is a tuple with the label ('text' or 'code'),

920 the corresponding content string of block and the leading line number

921

922 Returns

923 -------

924 output_blocks : list

925 List of strings where each element is the restructured text

926 representation of the output of each block

927

928 time_elapsed : float

929 Time elapsed during execution

930

931 memory_used : float

932 Memory used during execution

933 """

934 # Examples may contain if __name__ == '__main__' guards for in example scikit-learn if the example uses

935 # multiprocessing. Here we create a new __main__ module, and temporarily change sys.modules when running our example

936 fake_main = importlib.util.module_from_spec(importlib.util.spec_from_loader("__main__", None))

937 script.run_vars.fake_main = fake_main

938

939 example_globals = fake_main.__dict__

940 example_globals.update(

941 {

942 # A lot of examples contains 'print(__doc__)' for example in

943 # scikit-learn so that running the example prints some useful

944 # information. Because the docstring has been separated from

945 # the code blocks in mkdocs-gallery, __doc__ is actually

946 # __builtin__.__doc__ in the execution context and we do not

947 # want to print it

948 "__doc__": "",

949 # Don't ever support __file__: Issues #166 #212

950 # Don't let them use input()

951 "input": _check_input,

952 "__run_async__": run_async,

953 }

954 )

955 script.run_vars.example_globals = example_globals

956

957 # Manipulate the sys.argv before running the example

958 # See https://github.com/sphinx-gallery/sphinx-gallery/pull/252

959

960 # Remember the original argv so that we can put them back after run

961 argv_orig = sys.argv[:]

962

963 # Remember the original sys.path so that we can reset it after run

964 sys_path_orig = deepcopy(sys.path)

965

966 # Python file is the original one (not the copy for download)

967 sys.argv[0] = script.src_py_file.as_posix()

968

969 # Allow users to provide additional args through the 'reset_argv' option

970 sys.argv[1:] = script.gallery_conf["reset_argv"](script)

971

972 # Perform a garbage collection before starting so that perf kpis are accurate (memory and time)

973 gc.collect()

974

975 # Initial memory used

976 memory_start, _ = script.gallery_conf["call_memory"](lambda: None)

977 script.run_vars.memory_used_in_blocks = [memory_start] # include at least one entry to avoid max() ever failing

978

979 t_start = time()

980 compiler = codeop.Compile()

981

982 # Execute block by block

983 output_blocks = list()

984 with _LoggingTee(script.src_py_file) as logging_tee:

985 for block in script_blocks:

986 logging_tee.set_std_and_reset_position()

987 output_blocks.append(execute_code_block(compiler, block, script))

988

989 # Compute the elapsed time

990 time_elapsed = time() - t_start

991

992 # Set back the sys argv

993 sys.argv = argv_orig

994

995 # Set back the sys path

996 sys.path = sys_path_orig

997

998 # Write md5 checksum if the example was meant to run (no-plot shall not cache md5sum) and has built correctly

999 script.write_final_md5_file()

1000

1001 # Declare the example as "passing"

1002 script.gallery_conf["passing_examples"].append(script)

1003

1004 script.run_vars.memory_delta = max(script.run_vars.memory_used_in_blocks) - memory_start

1005 memory_used = script.gallery_conf["memory_base"] + script.run_vars.memory_delta

1006

1007 return output_blocks, time_elapsed, memory_used

1008

1009

1010 def generate_file_md(script: GalleryScript, seen_backrefs=None) -> GalleryScriptResults:

1011 """Generate the md file for a given example.

1012

1013 Parameters

1014 ----------

1015 script : GalleryScript

1016 The script to process

1017

1018 seen_backrefs : set

1019 The seen backreferences.

1020

1021 Returns

1022 -------

1023 result: FileResult

1024 The result of running this script

1025 """

1026 seen_backrefs = set() if seen_backrefs is None else seen_backrefs

1027

1028 # Extract the contents of the script

1029 file_conf, script_blocks, node = split_code_and_text_blocks(script.src_py_file, return_node=True)

1030

1031 # Extract the title and introduction from the module docstring and save the title in the object

1032 script.title, intro = extract_intro_and_title(docstring=script_blocks[0][1], script=script)

1033

1034 # Copy source python script to target folder if it is not there/up to date, so that it can be served/downloaded

1035 # Note: surprisingly this uses a md5 too, but not the final .md5 persisted on disk.

1036 script.make_dwnld_py_file()

1037

1038 # Can the script be entirely skipped (both doc generation and execution) ?

1039 if not script.has_changed_wrt_persisted_md5():

1040 # A priori we can...

1041 skip_and_return = True

1042

1043 # ...however for executables (not shared modules) we might need to run anyway because of config

1044 if script.is_executable_example():

1045 if script.gallery_conf["run_stale_examples"]:

1046 # Run anyway because config says so.

1047 skip_and_return = False

1048 else:

1049 # Add the example to the "stale examples" before returning

1050 script.gallery_conf["stale_examples"].append(script.dwnld_py_file)

1051 # If expected to fail, let's remove it from the 'expected_failing_examples' list,

1052 # assuming it did when previously executed

1053 if script.src_py_file in script.gallery_conf["expected_failing_examples"]:

1054 script.gallery_conf["expected_failing_examples"].remove(script.src_py_file)

1055

1056 if skip_and_return:

1057 # Return with 0 exec time and mem usage, and the existing thumbnail

1058 thumb_source_path = script.get_thumbnail_source(file_conf)

1059 thumb_file = create_thumb_from_image(script, thumb_source_path)

1060 return GalleryScriptResults(script=script, intro=intro, exec_time=0.0, memory=0.0, thumb=thumb_file)

1061

1062 # Reset matplotlib, seaborn, etc. if needed

1063 if script.is_executable_example():

1064 clean_modules(gallery_conf=script.gallery_conf, file=script.src_py_file)

1065

1066 # Init the runtime vars. Create the images directory and init the image files template

1067 script.init_before_processing()

1068

1069 if script.is_executable_example():

1070 # Note: this writes the md5 checksum if the example was meant to run

1071 output_blocks, time_elapsed, memory_used = parse_and_execute(script, script_blocks)

1072 logger.debug(f"{script.src_py_file} ran in : {time_elapsed:.2g} seconds\n")

1073 else:

1074 output_blocks = [""] * len(script_blocks)

1075 time_elapsed = memory_used = 0.0 # don't let the output change

1076 logger.debug(f"{script.src_py_file} parsed (not executed)\n")

1077

1078 # Create as many dummy images as required if needed (default none) so that references to script images

1079 # Can still work, even if the script was not executed (in development mode typically, to go fast).

1080 # See https://sphinx-gallery.github.io/stable/configuration.html#generating-dummy-images

1081 nb_dummy_images_to_generate = file_conf.get("dummy_images", None)

1082 if nb_dummy_images_to_generate is not None:

1083 if type(nb_dummy_images_to_generate) is not int:

1084 raise ExtensionError("mkdocs_gallery: 'dummy_images' setting is not a number, got {dummy_image!r}")

1085

1086 stock_img = os.path.join(glr_path_static(), "no_image.png")

1087 script.generate_n_dummy_images(img=stock_img, nb=nb_dummy_images_to_generate)

1088

1089 # Remove the mkdocs-gallery configuration comments from the script if needed

1090 if script.gallery_conf["remove_config_comments"]:

1091 script_blocks = [

1092 (label, remove_config_comments(content), line_number) for label, content, line_number in script_blocks

1093 ]

1094

1095 # Remove final empty block, which can occur after config comments are removed

1096 if script_blocks[-1][1].isspace():

1097 script_blocks = script_blocks[:-1]

1098 output_blocks = output_blocks[:-1]

1099

1100 # Generate the markdown string containing the script prose, code and output.

1101 example_md = generate_md_from_blocks(script_blocks, output_blocks, file_conf, script.gallery_conf)

1102

1103 # Write the generated markdown file

1104 md_header, md_footer = get_example_md_wrapper(script, time_elapsed, memory_used)

1105 full_md = md_header + example_md + md_footer

1106 script.save_md_example(full_md)

1107

1108 # Create the image thumbnail for the gallery summary

1109 if is_failing_example(script):

1110 # Failing example thumbnail

1111 thumb_source_path = Path(os.path.join(glr_path_static(), "broken_example.png"))

1112 else:

1113 # Get the thumbnail source image, possibly from config

1114 thumb_source_path = script.get_thumbnail_source(file_conf)

1115

1116 thumb_file = create_thumb_from_image(script, thumb_source_path)

1117

1118 # Generate the jupyter notebook

1119 example_nb = jupyter_notebook(script, script_blocks)

1120 ipy_file = _new_file(script.ipynb_file)

1121 save_notebook(example_nb, ipy_file)

1122 _replace_by_new_if_needed(ipy_file, md5_mode="t")

1123

1124 # Write names

1125 if script.gallery_conf["inspect_global_variables"]:

1126 global_variables = script.run_vars.example_globals

1127 else:

1128 global_variables = None

1129

1130 # TODO dig in just in case

1131 example_code_obj = identify_names(script_blocks, global_variables, node)

1132 if example_code_obj:

1133 # Write a pickle file (.pickle) containing `example_code_obj`

1134 codeobj_fname = _new_file(script.codeobj_file)

1135 with open(codeobj_fname, "wb") as fid:

1136 pickle.dump(example_code_obj, fid, pickle.HIGHEST_PROTOCOL)

1137 _replace_by_new_if_needed(codeobj_fname)

1138

1139 backrefs = set(

1140 "{module_short}.{name}".format(**cobj)

1141 for cobjs in example_code_obj.values()

1142 for cobj in cobjs

1143 if cobj["module"].startswith(script.gallery_conf["doc_module"])

1144 )

1145

1146 # Create results object

1147 res = GalleryScriptResults(

1148 script=script,

1149 intro=intro,

1150 exec_time=time_elapsed,

1151 memory=memory_used,

1152 thumb=thumb_file,

1153 )

1154

1155 # Write backreferences if required

1156 if script.gallery_conf["backreferences_dir"] is not None:

1157 _write_backreferences(backrefs, seen_backrefs, script_results=res)

1158

1159 return res

1160

1161

1162 # TODO the note should only appear in html mode. (.. only:: html)

1163 # TODO maybe remove as much as possible the css for now?

1164 EXAMPLE_HEADER = """

1165 <!--

1166 DO NOT EDIT.

1167 THIS FILE WAS AUTOMATICALLY GENERATED BY mkdocs-gallery.

1168 TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:

1169 "{pyfile_to_edit}"

1170 LINE NUMBERS ARE GIVEN BELOW.

1171 -->

1172

1173 !!! note

1174

1175 Click [here](#download_links)

1176 to download the full example code{opt_binder_text}

1177

1178 """ # TODO there was a {{: .mkd-glr-example-title }} for the title but is it useful ?

1179 MD_BLOCK_HEADER = """\

1180 

1181

1182 """

1183

1184

1185 def generate_md_from_blocks(script_blocks, output_blocks, file_conf, gallery_conf) -> str:

1186 """Generate the md string containing the script prose, code and output.

1187

1188 Parameters

1189 ----------

1190 script_blocks : list

1191 (label, content, line_number)

1192 List where each element is a tuple with the label ('text' or 'code'),

1193 the corresponding content string of block and the leading line number

1194

1195 output_blocks : list

1196 List of strings where each element is the restructured text

1197 representation of the output of each block

1198

1199 file_conf : dict

1200 File-specific settings given in source file comments as:

1201 ``# mkdocs_gallery_<name> = <value>``

1202

1203 gallery_conf : dict

1204 Contains the configuration of mkdocs-gallery

1205

1206 Returns

1207 -------

1208 out : str

1209 The resulting markdown page.

1210 """

1211

1212 # A simple example has two blocks: one for the

1213 # example introduction/explanation and one for the code

1214 is_example_notebook_like = len(script_blocks) > 2

1215 example_md = ""

1216 for bi, ((blabel, bcontent, lineno), code_output) in enumerate(zip(script_blocks, output_blocks)):

1217 # do not add comment to the title block (bi=0), otherwise the linking does not work properly

1218 if bi > 0:

1219 example_md += MD_BLOCK_HEADER.format(lineno, lineno + bcontent.count("\n"))

1220

1221 if blabel == "code":

1222 if not file_conf.get("line_numbers", gallery_conf.get("line_numbers", False)):

1223 lineno = None

1224

1225 code_md = codestr2md(bcontent, lang=gallery_conf["lang"], lineno=lineno) + "\n"

1226 if is_example_notebook_like:

1227 example_md += code_md

1228 example_md += code_output

1229 else:

1230 example_md += code_output

1231 if "mkd-glr-script-out" in code_output:

1232 # Add some vertical space after output

1233 example_md += "\n\n<br />\n\n" # "|\n\n"

1234 example_md += code_md

1235 else:

1236 block_separator = "\n\n" if not bcontent.endswith("\n") else "\n"

1237 example_md += bcontent + block_separator

1238

1239 return example_md

1240

1241

1242 def get_example_md_wrapper(script: GalleryScript, time_elapsed: float, memory_used: float) -> Tuple[str, str]:

1243 """Creates the headers and footers for the example markdown. Returns a template

1244

1245 Parameters

1246 ----------

1247 script : GalleryScript

1248 The script for which to generate the md.

1249

1250 time_elapsed : float

1251 Time elapsed in seconds while executing file

1252

1253 memory_used : float

1254 Additional memory used during the run.

1255

1256 Returns

1257 -------

1258 md_before : str

1259 Part of the final markdown that goes before the notebook / python script.

1260

1261 md_after : str

1262 Part of the final markdown that goes after the notebook / python script.

1263 """

1264 # Check binder configuration

1265 binder_conf = check_binder_conf(script.gallery_conf.get("binder"))

1266 use_binder = len(binder_conf) > 0

1267

1268 # Write header

1269 src_relative = script.src_py_file_rel_project.as_posix()

1270 binder_text = " or to run this example in your browser via Binder" if use_binder else ""

1271 md_before = EXAMPLE_HEADER.format(pyfile_to_edit=src_relative, opt_binder_text=binder_text)

1272

1273 # Footer

1274 md_after = ""

1275 # Report Time and Memory

1276 if time_elapsed >= script.gallery_conf["min_reported_time"]:

1277 time_m, time_s = divmod(time_elapsed, 60)

1278 md_after += TIMING_CONTENT.format(time_m, time_s)

1279

1280 if script.gallery_conf["show_memory"]:

1281 md_after += f"**Estimated memory usage:** {memory_used:.0f} MB\n\n"

1282

1283 # Download buttons

1284 # - Generate a binder URL if specified

1285 binder_badge_md = gen_binder_md(script, binder_conf) if use_binder else ""

1286 # - Rely on mkdocs-material for the icon

1287 icon = ":fontawesome-solid-download:"

1288 # - Generate the download buttons

1289 # TODO why aren't they centered actually ? does .center work ?

1290 md_after += f"""

1291 <div id="download_links"></div>

1292

1293 {binder_badge_md}

1294

1295 [{icon} Download Python source code: {script.dwnld_py_file.name}](./{script.dwnld_py_file.name}){{ .md-button .center}}

1296

1297 [{icon} Download Jupyter notebook: {script.ipynb_file.name}](./{script.ipynb_file.name}){{ .md-button .center}}

1298 """

1299

1300 # Add the "generated by mkdocs-gallery" footer

1301 md_after += MKD_GLR_SIG

1302

1303 return md_before, md_after

mkdocs_gallery/gen_single.py source