Source code for sentinelsat.products

import os
import shutil
from xml.etree import ElementTree as etree

import sentinelsat
from sentinelsat.sentinel import InvalidChecksumError, _check_scihub_response
from sentinelsat.exceptions import SentinelAPIError


def _xml_to_dataobj_info(element):
    assert etree.iselement(element)
    assert element.tag == "dataObject"
    data = dict(
        id=element.attrib["ID"],
        rep_id=element.attrib["repID"],
    )
    elem = element.find("byteStream")
    # data["mime_type"] = elem.attrib['mimeType']
    data["size"] = int(elem.attrib["size"])
    elem = element.find("byteStream/fileLocation")
    data["href"] = elem.attrib["href"]
    # data['locator_type'] = elem.attrib["locatorType"]
    # assert data['locator_type'] == "URL"

    elem = element.find("byteStream/checksum")
    assert elem.attrib["checksumName"].upper() == "MD5"
    data["md5"] = elem.text

    return data


[docs]class SentinelProductsAPI(sentinelsat.SentinelAPI): """Class to connect to Copernicus Open Access Hub, search and download imagery. The products node interface allows to filter and download individual product files by means of a (optional) *nodefilter* callable function. For each file in the product (only excluding the manifest) the *nodefilter* function is called to decide if the corresponding file must be downloaded or not. The *nodefilter* function has the following signature:: def nodefilter(node_info: dict) -> bool: ... The *node_info* parameter is a dictionary containing information like * the file *path* within the product (e.g. "./preview/map-overlay.kml") * the file size in bytes (int) * the file md5 It the *nodefilter* function returns True the corresponding file is downloaded, otherwise the file is not downloaded. Parameters ---------- user : string username for DataHub set to None to use ~/.netrc password : string password for DataHub set to None to use ~/.netrc api_url : string, optional URL of the DataHub defaults to 'https://scihub.copernicus.eu/apihub' show_progressbars : bool Whether progressbars should be shown or not, e.g. during download. Defaults to True. timeout : float or tuple, optional How long to wait for DataHub response (in seconds). Tuple (connect, read) allowed. Attributes ---------- session : requests.Session Session to connect to DataHub api_url : str URL to the DataHub page_size : int Number of results per query page. Current value: 100 (maximum allowed on ApiHub) timeout : float or tuple How long to wait for DataHub response (in seconds). .. versionadded:: 0.15 """ def _path_to_url(self, product_info, path, urltype=None): data = dict(id=product_info["id"], title=product_info["title"]) data["api_url"] = self.api_url data["path"] = "/".join(["Nodes('{}')".format(item) for item in path.split("/")]) if urltype == "value": data["urltype"] = "/$value" elif urltype == "json": data["urltype"] = "?$format=json" elif urltype == "full": data["urltype"] = "?$format=json&$expand=Attributes" elif urltype is None: data["urltype"] = "" else: data["urltype"] = urltype return "{api_url}odata/v1/Products('{id}')/Nodes('{title}.SAFE')/{path}{urltype}".format( **data ) def _get_manifest(self, product_info, path=None): url = self._path_to_url(product_info, "manifest.safe", "value") node_info = product_info.copy() node_info["url"] = url node_info["node_path"] = os.path.join(".", "manifest.safe") del node_info["md5"] if path and os.path.exists(path): self.logger.info("manifest file already available (%r), skip download", path) with open(path, "rb") as fd: data = fd.read() node_info["size"] = len(data) else: url = self._path_to_url(product_info, "manifest.safe", "json") response = self.session.get(url, auth=self.session.auth) _check_scihub_response(response) info = response.json()["d"] node_info["size"] = int(info["ContentLength"]) response = self.session.get(node_info["url"], auth=self.session.auth) _check_scihub_response(response, test_json=False) data = response.content if len(data) != node_info["size"]: raise SentinelAPIError("File corrupt: data length do not match") if path: os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "wb") as fd: fd.write(data) return node_info, data def _dataobj_to_node_info(self, dataobj_info, product_info): path = dataobj_info["href"] if path.startswith("./"): path = path[2:] node_info = product_info.copy() node_info["url"] = self._path_to_url(product_info, path, "value") node_info["size"] = dataobj_info["size"] node_info["md5"] = dataobj_info["md5"] node_info["node_path"] = dataobj_info["href"] # node_info["parent"] = product_info return node_info def _filter_nodes(self, manifest, product_info, nodefilter=None): if nodefilter is None: nodefilter = self.nodefilter nodes = {} xmldoc = etree.parse(manifest) data_obj_section_elem = xmldoc.find("dataObjectSection") for elem in data_obj_section_elem.iterfind("dataObject"): dataobj_info = _xml_to_dataobj_info(elem) node_info = self._dataobj_to_node_info(dataobj_info, product_info) if nodefilter is not None and not nodefilter(node_info): continue node_path = node_info["node_path"] nodes[node_path] = node_info return nodes
[docs] def download(self, id, directory_path=".", checksum=True, nodefilter=None, **kwargs): """Download a product. Uses the filename on the server for the downloaded files, e.g. "S1A_EW_GRDH_1SDH_20141003T003840_20141003T003920_002658_002F54_4DD1.SAFE/manifest.safe". Incomplete downloads are continued and complete files are skipped. Parameters ---------- id : string UUID of the product, e.g. 'a8dd0cfd-613e-45ce-868c-d79177b916ed' directory_path : string, optional Where the file will be downloaded checksum : bool, optional If True, verify the downloaded file's integrity by checking its MD5 checksum. Throws InvalidChecksumError if the checksum does not match. Defaults to True. nodefilter : callable, optional The *nodefilter* callable used to select which file of each product have to be downloaded. If *nodefilter* is None then no file filtering is performed and the class behaves exactly as :class:`sentinelsat.sentinel.SentinelAPI`. Returns ------- product_info : dict Dictionary containing the product's info from get_product_info() as well as the path on disk. Raises ------ InvalidChecksumError If the MD5 checksum does not match the checksum on the server. """ if nodefilter is None: return sentinelsat.SentinelAPI.download(self, id, directory_path, checksum, **kwargs) product_info = self.get_product_odata(id) product_path = os.path.join(directory_path, product_info["title"] + ".SAFE") product_info["node_path"] = os.path.join(".", product_info["title"] + ".SAFE") manifest_path = os.path.join(product_path, "manifest.safe") if not os.path.exists(manifest_path) and not product_info["Online"]: self.logger.warning( "Product %s is not online. Triggering retrieval from long term archive.", product_info["id"], ) self._trigger_offline_retrieval(product_info["url"]) return product_info manifest_info, _ = self._get_manifest(product_info, manifest_path) product_info["nodes"] = { manifest_info["node_path"]: manifest_info, } node_infos = self._filter_nodes(manifest_path, product_info, nodefilter) product_info["nodes"].update(node_infos) for node_info in node_infos.values(): node_path = node_info["node_path"] path = os.path.join(product_path, os.path.normpath(node_path)) node_info["path"] = path node_info["downloaded_bytes"] = 0 self.logger.info("Downloading %s node to %s", id, path) self.logger.debug("Node URL for %s: %s", id, node_info["url"]) if os.path.exists(path): # We assume that the product node has been downloaded and is complete continue # Use a temporary file for downloading temp_path = path + ".incomplete" skip_download = False if os.path.exists(temp_path): if os.path.getsize(temp_path) > node_info["size"]: self.logger.warning( "Existing incomplete file %s is larger than the expected final size" " (%s vs %s bytes). Deleting it.", str(temp_path), os.path.getsize(temp_path), node_info["size"], ) os.remove(temp_path) elif os.path.getsize(temp_path) == node_info["size"]: if checksum is True and not self._md5_compare(temp_path, node_info["md5"]): # Log a warning since this should never happen self.logger.warning( "Existing incomplete file %s appears to be fully downloaded but " "its checksum is incorrect. Deleting it.", str(temp_path), ) os.remove(temp_path) else: skip_download = True else: # continue downloading self.logger.info( "Download will resume from existing incomplete file %s.", temp_path ) pass if not skip_download: # Store the number of downloaded bytes for unit tests os.makedirs(os.path.dirname(temp_path), exist_ok=True) node_info["downloaded_bytes"] = self._download( node_info["url"], temp_path, self.session, node_info["size"] ) # Check integrity with MD5 checksum if checksum is True: if not self._md5_compare(temp_path, node_info["md5"]): os.remove(temp_path) raise InvalidChecksumError("File corrupt: checksums do not match") # Download successful, rename the temporary file to its proper name shutil.move(temp_path, path) return product_info
[docs]def make_size_filter(max_size): """Generate a nodefilter function to download only files below the specified maximum size. .. versionadded:: 0.15 """ def node_filter(node_info, size=max_size): if node_info["size"] <= size: return True else: return False return node_filter
[docs]def make_path_filter(pattern, exclude=False): """Generate a nodefilter function to download only files matching the specified pattern. Parameters ---------- pattern : str glob patter for files selection exclude : bool, optional if set to True then files matching the specified pattern are excluded. Default False. .. versionadded:: 0.15 """ if exclude: def node_filter(node_info, exclude_pattern=pattern): import fnmatch if not fnmatch.fnmatch(node_info["node_path"].lower(), exclude_pattern): return True else: return False else: def node_filter(node_info, include_pattern=pattern): import fnmatch if fnmatch.fnmatch(node_info["node_path"].lower(), include_pattern): return True else: return False return node_filter
[docs]def all_nodes_filter(node_info): """Node filter function to download all files. This function can be used to download Sentinel product as a directory instead of downloading a single zip archive. .. versionadded:: 0.15 """ return True