Source code for cellmaps_hierarchyeval.cellmaps_hierarchyevalcmd

#! /usr/bin/env python
import json
import os
import argparse
import sys
import logging
import logging.config
from cellmaps_utils import logutils
from cellmaps_utils import constants
import cellmaps_hierarchyeval
from cellmaps_hierarchyeval.runner import CellmapshierarchyevalRunner
from cellmaps_hierarchyeval.analysis import OllamaCommandLineGeneSetAgent
from cellmaps_hierarchyeval.analysis import OllamaRestServiceGenesetAgent
from cellmaps_hierarchyeval.analysis import FakeGeneSetAgent

logger = logging.getLogger(__name__)


HIERARCHYDIR = '--hierarchy_dir'
PATH_TO_OLLAMA = '/usr/local/bin/ollama'


def _parse_arguments(desc, args):
    """
    Parses command line arguments

    :param desc: description to display on command line
    :type desc: str
    :param args: command line arguments usually :py:func:`sys.argv[1:]`
    :type args: list
    :return: arguments parsed by :py:mod:`argparse`
    :rtype: :py:class:`argparse.Namespace`
    """
    parser = argparse.ArgumentParser(description=desc,
                                     formatter_class=constants.ArgParseFormatter)
    parser.add_argument('outdir', help='Output directory')
    parser.add_argument(HIERARCHYDIR, required=True,
                        help='Directory where hierarchy was generated')
    parser.add_argument('--max_fdr', type=float, default=CellmapshierarchyevalRunner.MAX_FDR,
                        help='Maximum false discovery rate')
    parser.add_argument('--min_jaccard_index', type=float,
                        default=CellmapshierarchyevalRunner.MIN_JACCARD_INDEX,
                        help='Minimum jaccard index')
    parser.add_argument('--min_comp_size', type=int, default=CellmapshierarchyevalRunner.MIN_COMP_SIZE,
                        help='Minimum term size to consider for enrichment')
    parser.add_argument('--corum', default=CellmapshierarchyevalRunner.CORUM,
                        help='UUID for CORUM network')
    parser.add_argument('--go_cc', default=CellmapshierarchyevalRunner.GO_CC,
                        help='UUID for GO-CC network')
    parser.add_argument('--hpa', default=CellmapshierarchyevalRunner.HPA,
                        help='UUID for HPA network')
    parser.add_argument('--ndex_server', default=CellmapshierarchyevalRunner.NDEX_SERVER,
                        help='NDEx server to use')
    parser.add_argument('--skip_term_enrichment', action='store_true',
                        help='If set, SKIP enrichment against networks set '
                             'via --corum, --go_cc, --hpa')
    parser.add_argument('--ollama', default=PATH_TO_OLLAMA,
                        help='Path to ollama command line binary or REST service. '
                             'If value starts with http it is assumed to be a REST '
                             'url and all prompts will be passed to service. For'
                             'REST url the suffix api/generate must be appended. '
                             'Example: http://foo/api/generate '
                             'NOTE: ollama integration with this tool is '
                             'EXPERIMENTAL and interface may be '
                             'changed or removed in the future ')
    parser.add_argument('--ollama_user',
                        help='Username to pass as basic auth to ollama REST '
                             'service')
    parser.add_argument('--ollama_password',
                        help='Password to pass via basic autho to ollama REST '
                             'service')
    parser.add_argument('--ollama_prompts', nargs='+',
                        help='Comma delimited value of format <MODEL NAME> or '
                             '<MODEL NAME>,<PROMPT> '
                             'where <PROMPT> can be path to prompt file or prompt to '
                             'run. For insertion of gene set please include {GENE_SET} '
                             'in prompt and tell LLM to put Process: <name> on first line '
                             'with name assigned to assembly and Confidence Score: <score> '
                             'on 2nd line with confidence in the name given. '
                             'If just <MODEL NAME> is set, then default prompt is used with '
                             'model specified. '
                             'NOTE: if <MODEL NAME> is set to FAKE then a completely fake '
                             ' agent will be used. Also note: ollama integration with this '
                             'tool is EXPERIMENTAL and interface may be '
                             'changed or removed in the future ')
    parser.add_argument('--provenance',
                        help='Path to file containing provenance '
                             'information about input files in JSON format. '
                             'This is required if inputdir does not contain '
                             'ro-crate-metadata.json file.')
    parser.add_argument('--name',
                        help='Name of this run, needed for FAIRSCAPE. If '
                             'unset, name value from specified '
                             'by --hierarchy_dir directory or provenance file will be used')
    parser.add_argument('--organization_name',
                        help='Name of organization running this tool, needed '
                             'for FAIRSCAPE. If unset, organization name specified '
                             'in --hierarchy_dir directory or provenance file will be used')
    parser.add_argument('--project_name',
                        help='Name of project running this tool, needed for '
                             'FAIRSCAPE. If unset, project name specified '
                             'in --hierarchy_dir directory or provenance file will be used')
    parser.add_argument('--skip_logging', action='store_true',
                        help='If set, output.log, error.log '
                             'files will not be created')
    parser.add_argument('--logconf', default=None,
                        help='Path to python logging configuration file in '
                             'this format: https://docs.python.org/3/library/'
                             'logging.config.html#logging-config-fileformat '
                             'Setting this overrides -v parameter which uses '
                             ' default logger. (default None)')
    parser.add_argument('--verbose', '-v', action='count', default=1,
                        help='Increases verbosity of logger to standard '
                             'error for log messages in this module. Messages are '
                             'output at these python logging levels '
                             '-v = WARNING, -vv = INFO, '
                             '-vvv = DEBUG, -vvvv = NOTSET (default ERROR '
                             'logging)')
    parser.add_argument('--version', action='version',
                        version=('%(prog)s ' +
                                 cellmaps_hierarchyeval.__version__))

    return parser.parse_args(args)


[docs] def get_ollama_geneset_agents(ollama=PATH_TO_OLLAMA, ollama_prompts=None, username=None, password=None): """ Parses **ollama_prompts** from argparse and creates geneset agents :param ollama: Path to ollama binary or REST service :type ollama: str :param ollama_prompts: :type ollama_prompts: list :return: """ if ollama_prompts is None: return None res = [] use_rest_service = False if ollama.startswith('http'): logger.info('For all agents, using ollama REST service: ' + str(ollama)) if not ollama.endswith('api/generate'): logger.warning(str(ollama) + ' does not end with api/generate and may not work.') use_rest_service = True for o_prompt in ollama_prompts: model, prompt = get_model_prompt_from_string(o_prompt) if model.lower() == 'fake': logger.debug('Creating FAKE geneset agent') res.append(FakeGeneSetAgent()) continue logger.debug('Creating ollama geneset agent for model: ' + str(model)) if use_rest_service is True: agent = OllamaRestServiceGenesetAgent(rest_url=ollama, username=username, password=password, model=model, prompt=prompt) else: agent = OllamaCommandLineGeneSetAgent(ollama_binary=ollama, model=model, prompt=prompt) res.append(agent) return res
[docs] def get_model_prompt_from_string(o_prompt): """ Given argument from --ollama_prompts flag extract model and prompt which can be in following formats: <MODEL> or <MODEL>,<PROMPT> Where <MODEL> will always just be a string, but <PROMPT> can be a string or a path to a file :param o_prompt: argument passed to --ollama_prompts :type o_prompt: str :return: model, prompt :rtype: tuple """ split_prompt = o_prompt.split(',') model = split_prompt[0] prompt = None if len(split_prompt) > 1: raw_prompt = split_prompt[1] if os.path.isfile(raw_prompt): with open(raw_prompt, 'r') as f: prompt = f.read() else: prompt = raw_prompt return model, prompt
[docs] def main(args): """ Main entry point for program :param args: arguments passed to command line usually :py:func:`sys.argv[1:]` :type args: list :return: return value of :py:meth:`cellmaps_hierarchyeval.runner.CellmapshierarchyevalRunner.run` or ``2`` if an exception is raised :rtype: int """ desc = """ Version {version} Takes a HiDeF {hierarchy_file} file from {hierarchy_dir} and runs enrichment tests for GO, CORUM, and HPA terms. Also includes EXPERIMENTAL support for invocation of LLMs via Ollama command line or Ollama REST service. To use see --ollama and --ollama_prompts flags """.format(version=cellmaps_hierarchyeval.__version__, hierarchy_file=constants.HIERARCHY_NETWORK_PREFIX, hierarchy_dir=HIERARCHYDIR) theargs = _parse_arguments(desc, args[1:]) theargs.program = args[0] theargs.version = cellmaps_hierarchyeval.__version__ if theargs.provenance is not None: with open(theargs.provenance, 'r') as f: json_prov = json.load(f) else: json_prov = None try: logutils.setup_cmd_logging(theargs) ollama_prompts = get_ollama_geneset_agents(ollama=theargs.ollama, ollama_prompts=theargs.ollama_prompts, username=theargs.ollama_user, password=theargs.ollama_password) return CellmapshierarchyevalRunner(outdir=theargs.outdir, max_fdr=theargs.max_fdr, min_jaccard_index=theargs.min_jaccard_index, min_comp_size=theargs.min_comp_size, corum=theargs.corum, go_cc=theargs.go_cc, hpa=theargs.hpa, ndex_server=theargs.ndex_server, geneset_agents=ollama_prompts, name=theargs.name, organization_name=theargs.organization_name, project_name=theargs.project_name, hierarchy_dir=theargs.hierarchy_dir, skip_term_enrichment=theargs.skip_term_enrichment, skip_logging=theargs.skip_logging, input_data_dict=theargs.__dict__, provenance=json_prov).run() except Exception as e: logger.exception('Caught exception: ' + str(e)) return 2 finally: logging.shutdown()
if __name__ == '__main__': # pragma: no cover sys.exit(main(sys.argv))