Source code for kegg_pull.entry_ids

"""
Pulling Lists of KEGG Entry IDs
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|Functionality| for pulling lists of KEGG entry IDs from the KEGG REST API.
"""
from . import rest as r
from . import kegg_url as ku


[docs] def from_database(database: str, kegg_rest: r.KEGGrest | None = None) -> list[str]: """ Pulls the KEGG entry IDs of a given database. :param database: The KEGG database to pull the entry IDs from. If equal to "brite", the "br:" prefix is prepended to each entry ID such that they succeed if used in downstream use of the KEGG "get" operation (e.g. for the "pull" API module or CLI subcommand). :param kegg_rest: The KEGGrest object to request the entry IDs. If None, one is created with the default parameters. :return: The list of resulting entry IDs. :raises RuntimeError: Raised if the request to the KEGG REST API fails or times out. """ entry_ids = _process_response(KEGGurl=ku.ListKEGGurl, kegg_rest=kegg_rest, database=database) if database == 'brite': entry_ids = [f'br:{entry_id}' for entry_id in entry_ids if not entry_id.startswith('br:')] return entry_ids
def _process_response(KEGGurl: type[ku.AbstractKEGGurl], kegg_rest: r.KEGGrest | None, **kwargs) -> list[str]: """ Extracts the entry IDs from a KEGG response if successful, else raises an exception. The KEGG response arrives from making an entry IDs related request with a KEGGrest object. :param KEGGurl: The URL class for the request. :param kegg_rest: The KEGGrest object to make the request with. If None, one is created with the default parameters. :param kwargs: The arguments to pass into the KEGGrest method. :return: The list of KEGG entry IDs. :raises RuntimeError: Raised if the KEGG response indicates a failure or time out. """ kegg_response: r.KEGGresponse = r.request_and_check_error(kegg_rest=kegg_rest, KEGGurl=KEGGurl, **kwargs) return _parse_entry_ids_string(entry_ids_string=kegg_response.text_body) def _parse_entry_ids_string(entry_ids_string: str) -> list[str]: """ Parses the entry IDs contained in a string. :param entry_ids_string: The string containing the entry IDs. :return: The list of parsed entry IDs. """ entry_ids = entry_ids_string.strip().split('\n') return [entry_id.split('\t')[0].strip() for entry_id in entry_ids if entry_id.strip() != '']
[docs] def from_file(file_path: str) -> list[str]: """ Loads KEGG entry IDs that are listed in a file with one entry ID on each line. :param file_path: The path to the file containing the entry IDs. :return: The list of entry IDs. :raises ValueError: Raised if the file is empty. """ with open(file_path, 'r') as file: entry_ids = file.read() if entry_ids == '': raise ValueError(f'Attempted to load entry IDs from {file_path}. But the file is empty') return _parse_entry_ids_string(entry_ids_string=entry_ids)
[docs] def from_keywords(database: str, keywords: list[str], kegg_rest: r.KEGGrest | None = None) -> list[str]: """ Pulls entry IDs from a KEGG database based on keywords searched in the entries. :param database: The name of the database to pull entry IDs from. :param keywords: The keywords to search entries in the database with. :param kegg_rest: The KEGGrest object to request the entry IDs. If None, one is created with the default parameters. :return: The list of entry IDs. :raises RuntimeError: Raised if the request to the KEGG REST API fails or times out. """ return _process_response(KEGGurl=ku.KeywordsFindKEGGurl, kegg_rest=kegg_rest, database=database, keywords=keywords)
[docs] def from_molecular_attribute( database: str, formula: str | None = None, exact_mass: float | tuple[float, float] | None = None, molecular_weight: int | tuple[int, int] | None = None, kegg_rest: r.KEGGrest | None = None) -> list[str]: """ Pulls entry IDs from a KEGG database containing chemical entries based on one (and only one) of three molecular attributes of the entries. :param database: The name of the database containing chemical entries. :param formula: The chemical formula to search for. :param exact_mass: The exact mass of the compound to search for (a single value or a range). :param molecular_weight: The molecular weight of the compound to search for (a single value or a range). :param kegg_rest: The KEGGrest object to request the entry IDs. If None, one is created with the default parameters. :return: The list of entry IDs. :raises RuntimeError: Raised if the request to the KEGG REST API fails or times out. """ return _process_response( KEGGurl=ku.MolecularFindKEGGurl, kegg_rest=kegg_rest, database=database, formula=formula, exact_mass=exact_mass, molecular_weight=molecular_weight)