Coverage for src/mkdocs_gallery/docs_resolv.py: 100%

0 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-30 08:26 +0000

1# Authors: Sylvain MARIE <sylvain.marie@se.com> 

2# + All contributors to <https://github.com/smarie/mkdocs-gallery> 

3# 

4# Original idea and code: sphinx-gallery, <https://sphinx-gallery.github.io> 

5# License: 3-clause BSD, <https://github.com/smarie/mkdocs-gallery/blob/master/LICENSE> 

6# """ 

7# Link resolver objects 

8# ===================== 

9# """ 

10# 

11# import codecs 

12# import gzip 

13# from io import BytesIO 

14# import os 

15# import pickle 

16# import posixpath 

17# import re 

18# import shelve 

19# import sys 

20# import urllib.request as urllib_request 

21# import urllib.parse as urllib_parse 

22# from urllib.error import HTTPError, URLError 

23# 

24# from .errors import ExtensionError 

25# from sphinx.search import js_index 

26# 

27# from . import sphinx_compatibility 

28# 

29# 

30# logger = sphinx_compatibility.getLogger('sphinx-gallery') 

31# 

32# 

33# def _get_data(url): 

34# """Get data over http(s) or from a local file.""" 

35# if urllib_parse.urlparse(url).scheme in ('http', 'https'): 

36# user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11' # noqa: E501 

37# headers = {'User-Agent': user_agent} 

38# req = urllib_request.Request(url, None, headers) 

39# resp = urllib_request.urlopen(req) 

40# encoding = resp.headers.get('content-encoding', 'plain') 

41# data = resp.read() 

42# if encoding == 'gzip': 

43# data = gzip.GzipFile(fileobj=BytesIO(data)).read() 

44# elif encoding != 'plain': 

45# raise ExtensionError('unknown encoding %r' % (encoding,)) 

46# data = data.decode('utf-8') 

47# else: 

48# with codecs.open(url, mode='r', encoding='utf-8') as fid: 

49# data = fid.read() 

50# 

51# return data 

52# 

53# 

54# def get_data(url, gallery_dir): 

55# """Persistent dictionary usage to retrieve the search indexes""" 

56# # shelve keys need to be str in python 2 

57# if sys.version_info[0] == 2 and isinstance(url, str): 

58# url = url.encode('utf-8') 

59# 

60# cached_file = os.path.join(gallery_dir, 'searchindex') 

61# search_index = shelve.open(cached_file) 

62# if url in search_index: 

63# data = search_index[url] 

64# else: 

65# data = _get_data(url) 

66# search_index[url] = data 

67# search_index.close() 

68# 

69# return data 

70# 

71# 

72# def parse_sphinx_docopts(index): 

73# """ 

74# Parse the Sphinx index for documentation options. 

75# 

76# Parameters 

77# ---------- 

78# index : str 

79# The Sphinx index page 

80# 

81# Returns 

82# ------- 

83# docopts : dict 

84# The documentation options from the page. 

85# """ 

86# 

87# pos = index.find('var DOCUMENTATION_OPTIONS') 

88# if pos < 0: 

89# raise ExtensionError( 

90# 'Documentation options could not be found in index.') 

91# pos = index.find('{', pos) 

92# if pos < 0: 

93# raise ExtensionError( 

94# 'Documentation options could not be found in index.') 

95# endpos = index.find('};', pos) 

96# if endpos < 0: 

97# raise ExtensionError( 

98# 'Documentation options could not be found in index.') 

99# block = index[pos + 1:endpos].strip() 

100# docopts = {} 

101# for line in block.splitlines(): 

102# key, value = line.split(':', 1) 

103# key = key.strip().strip('"') 

104# 

105# value = value.strip() 

106# if value[-1] == ',': 

107# value = value[:-1].rstrip() 

108# if value[0] in '"\'': 

109# value = value[1:-1] 

110# elif value == 'false': 

111# value = False 

112# elif value == 'true': 

113# value = True 

114# else: 

115# try: 

116# value = int(value) 

117# except ValueError: 

118# # In Sphinx 1.7.5, URL_ROOT is a JavaScript fragment. 

119# # Ignoring this entry since URL_ROOT is not used 

120# # elsewhere. 

121# # https://github.com/sphinx-gallery/sphinx-gallery/issues/382 

122# continue 

123# 

124# docopts[key] = value 

125# 

126# return docopts 

127# 

128# 

129# class SphinxDocLinkResolver(object): 

130# """ Resolve documentation links using searchindex.js generated by Sphinx 

131# 

132# Parameters 

133# ---------- 

134# doc_url : str 

135# The base URL of the project website. 

136# relative : bool 

137# Return relative links (only useful for links to documentation of this 

138# package). 

139# """ 

140# 

141# def __init__(self, doc_url, gallery_dir, relative=False): 

142# self.doc_url = doc_url 

143# self.gallery_dir = gallery_dir 

144# self.relative = relative 

145# self._link_cache = {} 

146# 

147# if doc_url.startswith(('http://', 'https://')): 

148# if relative: 

149# raise ExtensionError( 

150# 'Relative links are only supported for local ' 

151# 'URLs (doc_url cannot be absolute)') 

152# index_url = doc_url + '/' 

153# searchindex_url = doc_url + '/searchindex.js' 

154# docopts_url = doc_url + '_static/documentation_options.js' 

155# else: 

156# index_url = os.path.join(doc_url, 'index.html') 

157# searchindex_url = os.path.join(doc_url, 'searchindex.js') 

158# docopts_url = os.path.join( 

159# doc_url, '_static', 'documentation_options.js') 

160# 

161# # detect if we are using relative links on a Windows system 

162# if (os.name.lower() == 'nt' and 

163# not doc_url.startswith(('http://', 'https://'))): 

164# if not relative: 

165# raise ExtensionError( 

166# 'You have to use relative=True for the local' 

167# ' package on a Windows system.') 

168# self._is_windows = True 

169# else: 

170# self._is_windows = False 

171# 

172# # Download and find documentation options. As of Sphinx 1.7, these 

173# # options are now kept in a standalone file called 

174# # 'documentation_options.js'. Since SphinxDocLinkResolver can be called 

175# # not only for the documentation which is being built but also ones 

176# # that are being referenced, we need to try and get the index page 

177# # first and if that doesn't work, check for the 

178# # documentation_options.js file. 

179# index = get_data(index_url, gallery_dir) 

180# if 'var DOCUMENTATION_OPTIONS' in index: 

181# self._docopts = parse_sphinx_docopts(index) 

182# else: 

183# docopts = get_data(docopts_url, gallery_dir) 

184# self._docopts = parse_sphinx_docopts(docopts) 

185# 

186# # download and initialize the search index 

187# sindex = get_data(searchindex_url, gallery_dir) 

188# self._searchindex = js_index.loads(sindex) 

189# 

190# def _get_index_match(self, first, second): 

191# try: 

192# match = self._searchindex['objects'][first] 

193# except KeyError: 

194# return None 

195# else: 

196# if isinstance(match, dict): 

197# try: 

198# match = match[second] 

199# except KeyError: 

200# return None 

201# elif isinstance(match, (list, tuple)): # Sphinx 5.0.0 dev 

202# try: 

203# for item in match: 

204# if item[4] == second: 

205# match = item[:4] 

206# break 

207# else: 

208# return None 

209# except Exception: 

210# return None 

211# return match 

212# 

213# def _get_link_type(self, cobj): 

214# """Get a valid link and type_, False if not found.""" 

215# first, second = cobj['module_short'], cobj['name'] 

216# match = self._get_index_match(first, second) 

217# if match is None and '.' in second: # possible class attribute 

218# first, second = second.split('.', 1) 

219# first = '.'.join([cobj['module_short'], first]) 

220# match = self._get_index_match(first, second) 

221# if match is None: 

222# link = type_ = None 

223# else: 

224# fname_idx = match[0] 

225# objname_idx = str(match[1]) 

226# anchor = match[3] 

227# type_ = self._searchindex['objtypes'][objname_idx] 

228# 

229# fname = self._searchindex['filenames'][fname_idx] 

230# # In 1.5+ Sphinx seems to have changed from .rst.html to only 

231# # .html extension in converted files. Find this from the options. 

232# ext = self._docopts.get('FILE_SUFFIX', '.rst.html') 

233# fname = os.path.splitext(fname)[0] + ext 

234# if self._is_windows: 

235# fname = fname.replace('/', '\\') 

236# link = os.path.join(self.doc_url, fname) 

237# else: 

238# link = posixpath.join(self.doc_url, fname) 

239# 

240# fullname = '.'.join([first, second]) 

241# if anchor == '': 

242# anchor = fullname 

243# elif anchor == '-': 

244# anchor = (self._searchindex['objnames'][objname_idx][1] + '-' + 

245# fullname) 

246# 

247# link = link + '#' + anchor 

248# 

249# return link, type_ 

250# 

251# def resolve(self, cobj, this_url, return_type=False): 

252# """Resolve the link to the documentation, returns None if not found 

253# 

254# Parameters 

255# ---------- 

256# cobj : dict 

257# Dict with information about the "code object" for which we are 

258# resolving a link. 

259# cobj['name'] : function or class name (str) 

260# cobj['module_short'] : shortened module name (str) 

261# cobj['module'] : module name (str) 

262# this_url: str 

263# URL of the current page. Needed to construct relative URLs 

264# (only used if relative=True in constructor). 

265# return_type : bool 

266# If True, return the type as well. 

267# 

268# Returns 

269# ------- 

270# link : str or None 

271# The link (URL) to the documentation. 

272# type_ : str 

273# The type. Only returned if return_type is True. 

274# """ 

275# full_name = cobj['module_short'] + '.' + cobj['name'] 

276# if full_name not in self._link_cache: 

277# # we don't have it cached 

278# self._link_cache[full_name] = self._get_link_type(cobj) 

279# link, type_ = self._link_cache[full_name] 

280# 

281# if self.relative and link is not None: 

282# link = os.path.relpath(link, start=this_url) 

283# if self._is_windows: 

284# # replace '\' with '/' so it on the web 

285# link = link.replace('\\', '/') 

286# 

287# # for some reason, the relative link goes one directory too high up 

288# link = link[3:] 

289# 

290# return (link, type_) if return_type else link 

291# 

292# 

293# def _handle_http_url_error(e, msg='fetching'): 

294# if isinstance(e, HTTPError): 

295# error_msg = '%s %s: %s (%s)' % (msg, e.url, e.code, e.msg) 

296# elif isinstance(e, URLError): 

297# error_msg = '%s: %s' % (msg, e.reason) 

298# logger.warning('The following %s has occurred %s' % ( 

299# type(e).__name__, error_msg)) 

300# 

301# 

302# def _sanitize_css_class(s): 

303# for x in '~!@$%^&*()+=,./\';:"?><[]\\{}|`#': 

304# s = s.replace(x, '-') 

305# return s 

306# 

307# 

308# def _embed_code_links(app, gallery_conf, gallery_dir): 

309# # Add resolvers for the packages for which we want to show links 

310# doc_resolvers = {} 

311# 

312# src_gallery_dir = os.path.join(app.builder.srcdir, gallery_dir) 

313# for this_module, url in gallery_conf['reference_url'].items(): 

314# try: 

315# if url is None: 

316# doc_resolvers[this_module] = SphinxDocLinkResolver( 

317# app.builder.outdir, src_gallery_dir, relative=True) 

318# else: 

319# doc_resolvers[this_module] = SphinxDocLinkResolver( 

320# url, src_gallery_dir) 

321# 

322# except (URLError, HTTPError) as e: 

323# _handle_http_url_error(e) 

324# 

325# html_gallery_dir = os.path.abspath(os.path.join(app.builder.outdir, 

326# gallery_dir)) 

327# 

328# # patterns for replacement 

329# link_pattern = ( 

330# '<a href="{link}" title="{title}" class="{css_class}">{text}</a>') 

331# orig_pattern = '<span class="n">%s</span>' 

332# period = '<span class="o">.</span>' 

333# 

334# # This could be turned into a generator if necessary, but should be okay 

335# flat = [[dirpath, filename] 

336# for dirpath, _, filenames in os.walk(html_gallery_dir) 

337# for filename in filenames] 

338# iterator = sphinx_compatibility.status_iterator( 

339# flat, 'embedding documentation hyperlinks for %s... ' % gallery_dir, 

340# color='fuchsia', length=len(flat), 

341# stringify_func=lambda x: os.path.basename(x[1])) 

342# intersphinx_inv = getattr(app.env, 'intersphinx_named_inventory', dict()) 

343# builtin_modules = set(intersphinx_inv.get( 

344# 'python', dict()).get('py:module', dict()).keys()) 

345# for dirpath, fname in iterator: 

346# full_fname = os.path.join(html_gallery_dir, dirpath, fname) 

347# subpath = dirpath[len(html_gallery_dir) + 1:] 

348# pickle_fname = os.path.join(src_gallery_dir, subpath, 

349# fname[:-5] + '_codeobj.pickle') 

350# if not os.path.exists(pickle_fname): 

351# continue 

352# 

353# # we have a pickle file with the objects to embed links for 

354# with open(pickle_fname, 'rb') as fid: 

355# example_code_obj = pickle.load(fid) 

356# # generate replacement strings with the links 

357# str_repl = {} 

358# for name in sorted(example_code_obj): 

359# cobjs = example_code_obj[name] 

360# # possible names from identify_names, which in turn gets 

361# # possibilites from NameFinder.get_mapping 

362# link = type_ = None 

363# for cobj in cobjs: 

364# for modname in (cobj['module_short'], cobj['module']): 

365# this_module = modname.split('.')[0] 

366# cname = cobj['name'] 

367# 

368# # Try doc resolvers first 

369# if this_module in doc_resolvers: 

370# try: 

371# link, type_ = doc_resolvers[this_module].resolve( 

372# cobj, full_fname, return_type=True) 

373# except (HTTPError, URLError) as e: 

374# _handle_http_url_error( 

375# e, msg='resolving %s.%s' % (modname, cname)) 

376# 

377# # next try intersphinx 

378# if this_module == modname == 'builtins': 

379# this_module = 'python' 

380# elif modname in builtin_modules: 

381# this_module = 'python' 

382# if link is None and this_module in intersphinx_inv: 

383# inv = intersphinx_inv[this_module] 

384# if modname == 'builtins': 

385# want = cname 

386# else: 

387# want = '%s.%s' % (modname, cname) 

388# for key, value in inv.items(): 

389# # only python domain 

390# if key.startswith('py') and want in value: 

391# link = value[want][2] 

392# type_ = key 

393# break 

394# 

395# # differentiate classes from instances 

396# is_instance = (type_ is not None and 

397# 'py:class' in type_ and 

398# not cobj['is_class']) 

399# 

400# if link is not None: 

401# # Add CSS classes 

402# name_html = period.join(orig_pattern % part 

403# for part in name.split('.')) 

404# full_function_name = '%s.%s' % (modname, cname) 

405# css_class = ("mkd-glr-backref-module-" + 

406# _sanitize_css_class(modname)) 

407# if type_ is not None: 

408# css_class += (" mkd-glr-backref-type-" + 

409# _sanitize_css_class(type_)) 

410# if is_instance: 

411# css_class += " mkd-glr-backref-instance" 

412# str_repl[name_html] = link_pattern.format( 

413# link=link, title=full_function_name, 

414# css_class=css_class, text=name_html) 

415# break # loop over possible module names 

416# 

417# if link is not None: 

418# break # loop over cobjs 

419# 

420# # do the replacement in the html file 

421# 

422# # ensure greediness 

423# names = sorted(str_repl, key=len, reverse=True) 

424# regex_str = '|'.join(re.escape(name) for name in names) 

425# regex = re.compile(regex_str) 

426# 

427# def substitute_link(match): 

428# return str_repl[match.group()] 

429# 

430# if len(str_repl) > 0: 

431# with codecs.open(full_fname, 'r', 'utf-8') as fid: 

432# lines_in = fid.readlines() 

433# with codecs.open(full_fname, 'w', 'utf-8') as fid: 

434# for line in lines_in: 

435# line_out = regex.sub(substitute_link, line) 

436# fid.write(line_out) 

437# 

438# 

439# def embed_code_links(app, exception): 

440# """Embed hyperlinks to documentation into example code""" 

441# if exception is not None: 

442# return 

443# 

444# gallery_conf = app.config.sphinx_gallery_conf 

445# 

446# # XXX: Whitelist of builders for which it makes sense to embed 

447# # hyperlinks inside the example html. Note that the link embedding 

448# # require searchindex.js to exist for the links to the local doc 

449# # and there does not seem to be a good way of knowing which 

450# # builders creates a searchindex.js. 

451# if app.builder.name not in ['html', 'readthedocs']: 

452# return 

453# 

454# logger.info('embedding documentation hyperlinks...', color='white') 

455# 

456# gallery_dirs = gallery_conf['gallery_dirs'] 

457# if not isinstance(gallery_dirs, list): 

458# gallery_dirs = [gallery_dirs] 

459# 

460# for gallery_dir in gallery_dirs: 

461# _embed_code_links(app, gallery_conf, gallery_dir)