Coverage for odsclient/shortcuts.py: 85%

Shortcuts on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

53 statements  

1# Authors: Sylvain MARIE <sylvain.marie@se.com> 

2# + All contributors to <https://github.com/smarie/python-odsclient> 

3# 

4# License: 3-clause BSD, <https://github.com/smarie/python-odsclient/blob/master/LICENSE> 

5import os 

6from glob import glob 

7from shutil import rmtree 

8 

9try: 

10 # noinspection PyUnresolvedReferences 

11 from typing import Union 

12except ImportError: 

13 pass 

14 

15try: 

16 from pathlib import Path 

17except ImportError: 

18 # do not care: only used for type hinting 

19 pass 

20 

21from requests import Session 

22 

23from odsclient.core import KR_DEFAULT_USERNAME, ODSClient, CACHE_ROOT_FOLDER, baseurl_to_id_str, CacheEntry 

24 

25 

26def store_apikey_in_keyring(platform_id='public', # type: str 

27 base_url=None, # type: str 

28 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str 

29 apikey=None, # type: str 

30 ): 

31 """ 

32 Convenience method to store a password in the OS keyring using `keyring` lib. 

33 It is equivalent to `ODSClient(...).store_apikey_in_keyring(apikey)` 

34 

35 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern 

36 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url 

37 https://public.opendatasoft.com 

38 :param base_url: an explicit base url to use instead of the one generated from `platform_id` 

39 :param keyring_entries_username: keyring stores secrets with a key made of a service id and a username. We use 

40 the base url for the service id, however the user name can be anything. By default we use a string: 

41 'apikey_user'. 

42 :param apikey: an explicit apikey string. If not provided, `getpass()` will be used to prompt the user for the 

43 api key 

44 :return: 

45 """ 

46 client = ODSClient(platform_id=platform_id, base_url=base_url, keyring_entries_username=keyring_entries_username) 

47 client.store_apikey_in_keyring(apikey=apikey) 

48 

49 

50def get_apikey_from_keyring(platform_id='public', # type: str 

51 base_url=None, # type: str 

52 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str 

53 ): 

54 """ 

55 Convenience method to get a previously stored password in the OS keyring using `keyring` lib. 

56 It is equivalent to `ODSClient(...).get_apikey_from_keyring()` 

57 

58 :return: 

59 """ 

60 client = ODSClient(platform_id=platform_id, base_url=base_url, keyring_entries_username=keyring_entries_username) 

61 return client.get_apikey_from_keyring(ignore_import_errors=False) 

62 

63 

64def remove_apikey_from_keyring(platform_id='public', # type: str 

65 base_url=None, # type: str 

66 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str 

67 ): 

68 """ 

69 Convenience method to remove a previously stored password in the OS keyring using `keyring` lib. 

70 It is equivalent to `ODSClient(...).remove_apikey_from_keyring()` 

71 

72 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern 

73 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url 

74 https://public.opendatasoft.com 

75 :param base_url: an explicit base url to use instead of the one generated from `platform_id` 

76 :param keyring_entries_username: keyring stores secrets with a key made of a service id and a username. We use 

77 the base url for the service id, however the user name can be anything. By default we use a string: 

78 'apikey_user'. 

79 :return: 

80 """ 

81 client = ODSClient(platform_id=platform_id, base_url=base_url, keyring_entries_username=keyring_entries_username) 

82 client.remove_apikey_from_keyring() 

83 

84 

85def get_apikey(platform_id='public', # type: str 

86 base_url=None, # type: str 

87 apikey_filepath='ods.apikey', # type: str 

88 use_keyring=True, # type: bool 

89 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str 

90 ): 

91 # type: (...) -> str 

92 """ 

93 Convenience method to check what is the api key used by ods clients. 

94 It is equivalent to `ODSClient(...).get_apikey()` 

95 

96 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern 

97 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url 

98 https://public.opendatasoft.com 

99 :param base_url: an explicit base url to use instead of the one generated from `platform_id` 

100 :param apikey_filepath: the path that should be used to look for api keys on the file system. Such files are 

101 optional, other (safer) methods exist to pass the api key, see documentation for details. 

102 :param use_keyring: an optional boolean specifying whether the `keyring` library should be used to lookup 

103 existing api keys. Keys should be stored as `keyring.set_password(<base_url>, 'apikey', <apikey>)` where 

104 `<base_url>` should not contain any trailing slash. 

105 :param keyring_entries_username: keyring stores secrets with a key made of a service id and a username. We use 

106 the base url for the service id, however the user name can be anything. By default we use a string: 

107 'apikey_user'. 

108 :return: 

109 """ 

110 client = ODSClient(platform_id=platform_id, base_url=base_url, apikey_filepath=apikey_filepath, 

111 use_keyring=use_keyring, keyring_entries_username=keyring_entries_username) 

112 return client.get_apikey() 

113 

114 

115def get_whole_dataframe(dataset_id, # type: str 

116 use_labels_for_header=True, # type: bool 

117 tqdm=False, # type: bool 

118 block_size=1024, # type: int 

119 file_cache=False, # type: bool 

120 platform_id='public', # type: str 

121 base_url=None, # type: str 

122 enforce_apikey=False, # type: bool 

123 apikey=None, # type: str 

124 apikey_filepath='ods.apikey', # type: Union[str, Path] 

125 use_keyring=True, # type: bool 

126 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str 

127 requests_session=None, # type: Session 

128 auto_close_session=None, # type: bool 

129 **other_opts 

130 ): 

131 """ 

132 Shortcut method for ODSClient(...).get_whole_dataframe(...) 

133 Returns a dataset as a pandas dataframe. pandas must be installed. 

134 

135 :param dataset_id: 

136 :param use_labels_for_header: 

137 :param tqdm: a boolean indicating if a progress bar using tqdm should be displayed. tqdm should be installed 

138 :param block_size: an int block size used in streaming mode when to_csv or tqdm is used 

139 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern 

140 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url 

141 https://public.opendatasoft.com 

142 :param base_url: an explicit base url to use instead of the one generated from `platform_id` 

143 :param enforce_apikey: an optional boolean indicating if an error should be raised if no apikey is found at all 

144 (not in the explicit argument, not in a file, environment variable, nor keyring) (default `False`) 

145 :param apikey: an explicit api key as a string. 

146 :param apikey_filepath: the path that should be used to look for api keys on the file system. Such files are 

147 optional, other (safer) methods exist to pass the api key, see documentation for details. 

148 :param use_keyring: an optional boolean (default `True`) specifying whether the `keyring` library should be used 

149 to lookup existing api keys. Keys should be stored using `store_apikey_in_keyring()`. 

150 :param keyring_entries_username: keyring stores secrets with a key made of a service id and a username. We use 

151 the base url for the service id, however the user name can be anything. By default we use a string: 

152 'apikey_user'. 

153 :param requests_session: an optional `Session` object to use (from `requests` lib). If `None` is provided, 

154 a new `Session` will be used and deleted when this object is garbaged out. If a custom object is provided, 

155 you should close it yourself or switch `auto_close_session` to `True` explicitly. 

156 :param auto_close_session: an optional boolean indicating if `self.session` should be closed when this object 

157 is garbaged out. By default this is `None` and means "`True` if no custom `requests_session` is passed, else 

158 `False`"). Turning this to `False` can leave hanging Sockets unclosed. 

159 :param other_opts: 

160 :return: 

161 """ 

162 client = ODSClient(platform_id=platform_id, base_url=base_url, enforce_apikey=enforce_apikey, apikey=apikey, 

163 apikey_filepath=apikey_filepath, use_keyring=use_keyring, auto_close_session=auto_close_session, 

164 keyring_entries_username=keyring_entries_username, requests_session=requests_session) 

165 return client.get_whole_dataframe(dataset_id=dataset_id, use_labels_for_header=use_labels_for_header, 

166 tqdm=tqdm, block_size=block_size, file_cache=file_cache, **other_opts) 

167 

168 

169def clean_cache(dataset_id=None, # type: str 

170 # format='csv', # type: str 

171 platform_id=None, # type: str 

172 base_url=None, # type: str 

173 cache_root=None # type: Union[str, Path] 

174 ): 

175 """ 

176 Cleans the file cache 

177 

178 :param dataset_id: 

179 :param platform_id: 

180 :param base_url: 

181 :return: 

182 """ 

183 if dataset_id is not None: 

184 # clean a specific dataset on a specific platform 

185 path_pattern = get_cached_dataset_entry(dataset_id, format="*", platform_id=platform_id, base_url=base_url, 

186 cache_root=cache_root) 

187 for cached_file in glob(str(path_pattern.file_path)): 

188 print("[odsclient] Removing cached dataset entry for %r: %r" % (dataset_id, cached_file)) 

189 os.remove(cached_file) 

190 else: 

191 if cache_root is None: 

192 cache_root = CACHE_ROOT_FOLDER 

193 else: 

194 cache_root = str(cache_root) 

195 

196 if platform_id is not None: 196 ↛ 197line 196 didn't jump to line 197, because the condition on line 196 was never true

197 p = platform_id 

198 elif base_url is not None: 198 ↛ 199line 198 didn't jump to line 199, because the condition on line 198 was never true

199 p = baseurl_to_id_str(base_url) 

200 else: 

201 p = None 

202 

203 if p is None: 203 ↛ 209line 203 didn't jump to line 209, because the condition on line 203 was never false

204 # clean the whole cache 

205 print("[odsclient] Removing entire cache folder %r" % cache_root) 

206 rmtree(cache_root, ignore_errors=True) 

207 else: 

208 # clean an entire platform cache 

209 path_to_delete = "%s/%s/" % (cache_root, p) 

210 print("[odsclient] Removing cache for platform %r: folder %r" % (p, path_to_delete)) 

211 rmtree(path_to_delete, ignore_errors=True) 

212 

213 

214def get_cached_dataset_entry(dataset_id, # type: str 

215 format='csv', # type: str 

216 platform_id='public', # type: str 

217 base_url=None, # type: str 

218 cache_root=None # type: Union[str, Path] 

219 ): 

220 # type: (...) -> CacheEntry 

221 """ 

222 Shortcut method for ODSClient(...).get_cached_dataset_entry(...) 

223 

224 :param dataset_id: 

225 :param format: 

226 :param platform_id: 

227 :param base_url: 

228 :param cache_root: 

229 :return: 

230 """ 

231 client = ODSClient(platform_id=platform_id, base_url=base_url) 

232 return client.get_cached_dataset_entry(dataset_id=dataset_id, format=format, cache_root=cache_root) 

233 

234 

235# noinspection PyShadowingBuiltins 

236def get_whole_dataset(dataset_id, # type: str 

237 format='csv', # type: str 

238 timezone=None, # type: str 

239 use_labels_for_header=True, # type: bool 

240 csv_separator=';', # type: str 

241 tqdm=False, # type: bool 

242 to_path=None, # type: Union[str, Path] 

243 file_cache=False, # type: bool 

244 block_size=1024, # type: int 

245 platform_id='public', # type: str 

246 base_url=None, # type: str 

247 enforce_apikey=False, # type: bool 

248 apikey=None, # type: str 

249 apikey_filepath='ods.apikey', # type: Union[str, Path] 

250 use_keyring=True, # type: bool 

251 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str 

252 requests_session=None, # type: Session 

253 auto_close_session=None, # type: bool 

254 **other_opts 

255 ): 

256 """ 

257 Shortcut method for ODSClient(...).get_whole_dataset(...) 

258 

259 :param dataset_id: 

260 :param format: 

261 :param timezone: 

262 :param use_labels_for_header: 

263 :param csv_separator: 

264 :param tqdm: a boolean indicating if a progress bar using tqdm should be displayed. tqdm should be installed 

265 :param to_path: a string indicating the file path where to write the dataset (csv or other format). In that case 

266 nothing is returned 

267 :param file_cache: a boolean (default False) indicating whether the file should be written to a local cache 

268 `.odsclient/<pseudo_platform_id>_<dataset_id>.<format>`. See `get_cached_datasset_entry` for details. 

269 :param block_size: an int block size used in streaming mode when to_csv or tqdm is used 

270 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern 

271 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url 

272 https://public.opendatasoft.com 

273 :param base_url: an explicit base url to use instead of the one generated from `platform_id` 

274 :param enforce_apikey: an optional boolean indicating if an error should be raised if no apikey is found at all 

275 (not in the explicit argument, not in a file, environment variable, nor keyring) (default `False`) 

276 :param apikey: an explicit api key as a string. 

277 :param apikey_filepath: the path that should be used to look for api keys on the file system. Such files are 

278 optional, other (safer) methods exist to pass the api key, see documentation for details. 

279 :param use_keyring: an optional boolean (default `True`) specifying whether the `keyring` library should be used 

280 to lookup existing api keys. Keys should be stored using `store_apikey_in_keyring()`. 

281 :param keyring_entries_username: keyring stores secrets with a key made of a service id and a username. We use 

282 the base url for the service id, however the user name can be anything. By default we use a string: 

283 'apikey_user'. 

284 :param requests_session: an optional `Session` object to use (from `requests` lib). If `None` is provided, 

285 a new `Session` will be used and deleted when this object is garbaged out. If a custom object is provided, 

286 you should close it yourself or switch `auto_close_session` to `True` explicitly. 

287 :param auto_close_session: an optional boolean indicating if `self.session` should be closed when this object 

288 is garbaged out. By default this is `None` and means "`True` if no custom `requests_session` is passed, else 

289 `False`"). Turning this to `False` can leave hanging Sockets unclosed. 

290 :param other_opts: 

291 :return: 

292 """ 

293 client = ODSClient(platform_id=platform_id, base_url=base_url, enforce_apikey=enforce_apikey, apikey=apikey, 

294 apikey_filepath=apikey_filepath, use_keyring=use_keyring, auto_close_session=auto_close_session, 

295 keyring_entries_username=keyring_entries_username, requests_session=requests_session) 

296 return client.get_whole_dataset(dataset_id=dataset_id, format=format, file_cache=file_cache, 

297 timezone=timezone, use_labels_for_header=use_labels_for_header, 

298 csv_separator=csv_separator, tqdm=tqdm, to_path=to_path, block_size=block_size, 

299 **other_opts) 

300 

301 

302def push_dataset_realtime(platform_id, # type: str 

303 dataset_id, # type: str 

304 dataset, # type: Union[str, pandas.DataFrame] 

305 push_key, # type: str 

306 format='csv', # type: str 

307 csv_separator=';', # type: str 

308 **other_opts 

309 ): 

310 """ 

311 Pushes a Dataset. This functions accepts either a Pandas Dataframe or a CSV string with header included. 

312 

313 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern 

314 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url 

315 https://public.opendatasoft.com 

316 :param dataset_id: 

317 :param dataset: The dataset to push as a list of dicts, where the dict keys are the column names 

318 :param push_key: The Push Key provided by the API for pushing this dataset. Warning: This key is independent 

319 from the API key. It can be acquired from the Realtime Push API URL section in ODS. 

320 :param format: The format of the dataset to be pushed. Can be `pandas` or `csv`. 

321 :param csv_separator: CSV separator character in case of a csv dataset input. 

322 :returns: HTTP Response status 

323 """ 

324 

325 client = ODSClient(platform_id=platform_id) 

326 return client.push_dataset_realtime(dataset_id=dataset_id, 

327 dataset=dataset, 

328 push_key=push_key, 

329 format=format, 

330 csv_separator=csv_separator, 

331 **other_opts)