1 # Authors: Sylvain MARIE <sylvain.marie@se.com>
2 # + All contributors to <https://github.com/smarie/python-odsclient>
3 #
4 # License: 3-clause BSD, <https://github.com/smarie/python-odsclient/blob/master/LICENSE>
5 import os
6 from glob import glob
7 from shutil import rmtree
8
9 try:
10 # noinspection PyUnresolvedReferences
11 from typing import Union
12 except ImportError:
13 pass
14
15 try:
16 from pathlib import Path
17 except ImportError:
18 # do not care: only used for type hinting
19 pass
20
21 from requests import Session
22
23 from odsclient.core import KR_DEFAULT_USERNAME, ODSClient, CACHE_ROOT_FOLDER, baseurl_to_id_str, CacheEntry
24
25
26 def store_apikey_in_keyring(platform_id='public', # type: str
27 base_url=None, # type: str
28 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str
29 apikey=None, # type: str
30 ):
31 """
32 Convenience method to store a password in the OS keyring using `keyring` lib.
33 It is equivalent to `ODSClient(...).store_apikey_in_keyring(apikey)`
34
35 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern
36 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url
37 https://public.opendatasoft.com
38 :param base_url: an explicit base url to use instead of the one generated from `platform_id`
39 :param keyring_entries_username: keyring stores secrets with a key made of a service id and a username. We use
40 the base url for the service id, however the user name can be anything. By default we use a string:
41 'apikey_user'.
42 :param apikey: an explicit apikey string. If not provided, `getpass()` will be used to prompt the user for the
43 api key
44 :return:
45 """
46 client = ODSClient(platform_id=platform_id, base_url=base_url, keyring_entries_username=keyring_entries_username)
47 client.store_apikey_in_keyring(apikey=apikey)
48
49
50 def get_apikey_from_keyring(platform_id='public', # type: str
51 base_url=None, # type: str
52 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str
53 ):
54 """
55 Convenience method to get a previously stored password in the OS keyring using `keyring` lib.
56 It is equivalent to `ODSClient(...).get_apikey_from_keyring()`
57
58 :return:
59 """
60 client = ODSClient(platform_id=platform_id, base_url=base_url, keyring_entries_username=keyring_entries_username)
61 return client.get_apikey_from_keyring(ignore_import_errors=False)
62
63
64 def remove_apikey_from_keyring(platform_id='public', # type: str
65 base_url=None, # type: str
66 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str
67 ):
68 """
69 Convenience method to remove a previously stored password in the OS keyring using `keyring` lib.
70 It is equivalent to `ODSClient(...).remove_apikey_from_keyring()`
71
72 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern
73 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url
74 https://public.opendatasoft.com
75 :param base_url: an explicit base url to use instead of the one generated from `platform_id`
76 :param keyring_entries_username: keyring stores secrets with a key made of a service id and a username. We use
77 the base url for the service id, however the user name can be anything. By default we use a string:
78 'apikey_user'.
79 :return:
80 """
81 client = ODSClient(platform_id=platform_id, base_url=base_url, keyring_entries_username=keyring_entries_username)
82 client.remove_apikey_from_keyring()
83
84
85 def get_apikey(platform_id='public', # type: str
86 base_url=None, # type: str
87 apikey_filepath='ods.apikey', # type: str
88 use_keyring=True, # type: bool
89 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str
90 ):
91 # type: (...) -> str
92 """
93 Convenience method to check what is the api key used by ods clients.
94 It is equivalent to `ODSClient(...).get_apikey()`
95
96 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern
97 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url
98 https://public.opendatasoft.com
99 :param base_url: an explicit base url to use instead of the one generated from `platform_id`
100 :param apikey_filepath: the path that should be used to look for api keys on the file system. Such files are
101 optional, other (safer) methods exist to pass the api key, see documentation for details.
102 :param use_keyring: an optional boolean specifying whether the `keyring` library should be used to lookup
103 existing api keys. Keys should be stored as `keyring.set_password(<base_url>, 'apikey', <apikey>)` where
104 `<base_url>` should not contain any trailing slash.
105 :param keyring_entries_username: keyring stores secrets with a key made of a service id and a username. We use
106 the base url for the service id, however the user name can be anything. By default we use a string:
107 'apikey_user'.
108 :return:
109 """
110 client = ODSClient(platform_id=platform_id, base_url=base_url, apikey_filepath=apikey_filepath,
111 use_keyring=use_keyring, keyring_entries_username=keyring_entries_username)
112 return client.get_apikey()
113
114
115 def get_whole_dataframe(dataset_id, # type: str
116 use_labels_for_header=True, # type: bool
117 tqdm=False, # type: bool
118 block_size=1024, # type: int
119 file_cache=False, # type: bool
120 platform_id='public', # type: str
121 base_url=None, # type: str
122 enforce_apikey=False, # type: bool
123 apikey=None, # type: str
124 apikey_filepath='ods.apikey', # type: Union[str, Path]
125 use_keyring=True, # type: bool
126 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str
127 requests_session=None, # type: Session
128 auto_close_session=None, # type: bool
129 **other_opts
130 ):
131 """
132 Shortcut method for ODSClient(...).get_whole_dataframe(...)
133 Returns a dataset as a pandas dataframe. pandas must be installed.
134
135 :param dataset_id:
136 :param use_labels_for_header:
137 :param tqdm: a boolean indicating if a progress bar using tqdm should be displayed. tqdm should be installed
138 :param block_size: an int block size used in streaming mode when to_csv or tqdm is used
139 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern
140 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url
141 https://public.opendatasoft.com
142 :param base_url: an explicit base url to use instead of the one generated from `platform_id`
143 :param enforce_apikey: an optional boolean indicating if an error should be raised if no apikey is found at all
144 (not in the explicit argument, not in a file, environment variable, nor keyring) (default `False`)
145 :param apikey: an explicit api key as a string.
146 :param apikey_filepath: the path that should be used to look for api keys on the file system. Such files are
147 optional, other (safer) methods exist to pass the api key, see documentation for details.
148 :param use_keyring: an optional boolean (default `True`) specifying whether the `keyring` library should be used
149 to lookup existing api keys. Keys should be stored using `store_apikey_in_keyring()`.
150 :param keyring_entries_username: keyring stores secrets with a key made of a service id and a username. We use
151 the base url for the service id, however the user name can be anything. By default we use a string:
152 'apikey_user'.
153 :param requests_session: an optional `Session` object to use (from `requests` lib). If `None` is provided,
154 a new `Session` will be used and deleted when this object is garbaged out. If a custom object is provided,
155 you should close it yourself or switch `auto_close_session` to `True` explicitly.
156 :param auto_close_session: an optional boolean indicating if `self.session` should be closed when this object
157 is garbaged out. By default this is `None` and means "`True` if no custom `requests_session` is passed, else
158 `False`"). Turning this to `False` can leave hanging Sockets unclosed.
159 :param other_opts:
160 :return:
161 """
162 client = ODSClient(platform_id=platform_id, base_url=base_url, enforce_apikey=enforce_apikey, apikey=apikey,
163 apikey_filepath=apikey_filepath, use_keyring=use_keyring, auto_close_session=auto_close_session,
164 keyring_entries_username=keyring_entries_username, requests_session=requests_session)
165 return client.get_whole_dataframe(dataset_id=dataset_id, use_labels_for_header=use_labels_for_header,
166 tqdm=tqdm, block_size=block_size, file_cache=file_cache, **other_opts)
167
168
169 def clean_cache(dataset_id=None, # type: str
170 # format='csv', # type: str
171 platform_id=None, # type: str
172 base_url=None, # type: str
173 cache_root=None # type: Union[str, Path]
174 ):
175 """
176 Cleans the file cache
177
178 :param dataset_id:
179 :param platform_id:
180 :param base_url:
181 :return:
182 """
183 if dataset_id is not None:
184 # clean a specific dataset on a specific platform
185 path_pattern = get_cached_dataset_entry(dataset_id, format="*", platform_id=platform_id, base_url=base_url,
186 cache_root=cache_root)
187 for cached_file in glob(str(path_pattern.file_path)):
188 print("[odsclient] Removing cached dataset entry for %r: %r" % (dataset_id, cached_file))
189 os.remove(cached_file)
190 else:
191 if cache_root is None:
192 cache_root = CACHE_ROOT_FOLDER
193 else:
194 cache_root = str(cache_root)
195
196 if platform_id is not None:
197 p = platform_id
198 elif base_url is not None:
199 p = baseurl_to_id_str(base_url)
200 else:
201 p = None
202
203 if p is None:
204 # clean the whole cache
205 print("[odsclient] Removing entire cache folder %r" % cache_root)
206 rmtree(cache_root, ignore_errors=True)
207 else:
208 # clean an entire platform cache
209 path_to_delete = "%s/%s/" % (cache_root, p)
210 print("[odsclient] Removing cache for platform %r: folder %r" % (p, path_to_delete))
211 rmtree(path_to_delete, ignore_errors=True)
212
213
214 def get_cached_dataset_entry(dataset_id, # type: str
215 format='csv', # type: str
216 platform_id='public', # type: str
217 base_url=None, # type: str
218 cache_root=None # type: Union[str, Path]
219 ):
220 # type: (...) -> CacheEntry
221 """
222 Shortcut method for ODSClient(...).get_cached_dataset_entry(...)
223
224 :param dataset_id:
225 :param format:
226 :param platform_id:
227 :param base_url:
228 :param cache_root:
229 :return:
230 """
231 client = ODSClient(platform_id=platform_id, base_url=base_url)
232 return client.get_cached_dataset_entry(dataset_id=dataset_id, format=format, cache_root=cache_root)
233
234
235 # noinspection PyShadowingBuiltins
236 def get_whole_dataset(dataset_id, # type: str
237 format='csv', # type: str
238 timezone=None, # type: str
239 use_labels_for_header=True, # type: bool
240 csv_separator=';', # type: str
241 tqdm=False, # type: bool
242 to_path=None, # type: Union[str, Path]
243 file_cache=False, # type: bool
244 block_size=1024, # type: int
245 platform_id='public', # type: str
246 base_url=None, # type: str
247 enforce_apikey=False, # type: bool
248 apikey=None, # type: str
249 apikey_filepath='ods.apikey', # type: Union[str, Path]
250 use_keyring=True, # type: bool
251 keyring_entries_username=KR_DEFAULT_USERNAME, # type: str
252 requests_session=None, # type: Session
253 auto_close_session=None, # type: bool
254 **other_opts
255 ):
256 """
257 Shortcut method for ODSClient(...).get_whole_dataset(...)
258
259 :param dataset_id:
260 :param format:
261 :param timezone:
262 :param use_labels_for_header:
263 :param csv_separator:
264 :param tqdm: a boolean indicating if a progress bar using tqdm should be displayed. tqdm should be installed
265 :param to_path: a string indicating the file path where to write the dataset (csv or other format). In that case
266 nothing is returned
267 :param file_cache: a boolean (default False) indicating whether the file should be written to a local cache
268 `.odsclient/<pseudo_platform_id>_<dataset_id>.<format>`. See `get_cached_datasset_entry` for details.
269 :param block_size: an int block size used in streaming mode when to_csv or tqdm is used
270 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern
271 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url
272 https://public.opendatasoft.com
273 :param base_url: an explicit base url to use instead of the one generated from `platform_id`
274 :param enforce_apikey: an optional boolean indicating if an error should be raised if no apikey is found at all
275 (not in the explicit argument, not in a file, environment variable, nor keyring) (default `False`)
276 :param apikey: an explicit api key as a string.
277 :param apikey_filepath: the path that should be used to look for api keys on the file system. Such files are
278 optional, other (safer) methods exist to pass the api key, see documentation for details.
279 :param use_keyring: an optional boolean (default `True`) specifying whether the `keyring` library should be used
280 to lookup existing api keys. Keys should be stored using `store_apikey_in_keyring()`.
281 :param keyring_entries_username: keyring stores secrets with a key made of a service id and a username. We use
282 the base url for the service id, however the user name can be anything. By default we use a string:
283 'apikey_user'.
284 :param requests_session: an optional `Session` object to use (from `requests` lib). If `None` is provided,
285 a new `Session` will be used and deleted when this object is garbaged out. If a custom object is provided,
286 you should close it yourself or switch `auto_close_session` to `True` explicitly.
287 :param auto_close_session: an optional boolean indicating if `self.session` should be closed when this object
288 is garbaged out. By default this is `None` and means "`True` if no custom `requests_session` is passed, else
289 `False`"). Turning this to `False` can leave hanging Sockets unclosed.
290 :param other_opts:
291 :return:
292 """
293 client = ODSClient(platform_id=platform_id, base_url=base_url, enforce_apikey=enforce_apikey, apikey=apikey,
294 apikey_filepath=apikey_filepath, use_keyring=use_keyring, auto_close_session=auto_close_session,
295 keyring_entries_username=keyring_entries_username, requests_session=requests_session)
296 return client.get_whole_dataset(dataset_id=dataset_id, format=format, file_cache=file_cache,
297 timezone=timezone, use_labels_for_header=use_labels_for_header,
298 csv_separator=csv_separator, tqdm=tqdm, to_path=to_path, block_size=block_size,
299 **other_opts)
300
301
302 def push_dataset_realtime(platform_id, # type: str
303 dataset_id, # type: str
-
F821
Undefined name 'pandas'
304 dataset, # type: Union[str, pandas.DataFrame]
305 push_key, # type: str
306 format='csv', # type: str
307 csv_separator=';', # type: str
308 **other_opts
309 ):
310 """
311 Pushes a Dataset. This functions accepts either a Pandas Dataframe or a CSV string with header included.
312
313 :param platform_id: the ods platform id to use. This id is used to construct the base URL based on the pattern
314 https://<platform_id>.opendatasoft.com. Default is `'public'` which leads to the base url
315 https://public.opendatasoft.com
316 :param dataset_id:
317 :param dataset: The dataset to push as a list of dicts, where the dict keys are the column names
318 :param push_key: The Push Key provided by the API for pushing this dataset. Warning: This key is independent
319 from the API key. It can be acquired from the Realtime Push API URL section in ODS.
320 :param format: The format of the dataset to be pushed. Can be `pandas` or `csv`.
321 :param csv_separator: CSV separator character in case of a csv dataset input.
322 :returns: HTTP Response status
323 """
324
325 client = ODSClient(platform_id=platform_id)
326 return client.push_dataset_realtime(dataset_id=dataset_id,
327 dataset=dataset,
328 push_key=push_key,
329 format=format,
330 csv_separator=csv_separator,
331 **other_opts)