Source code for cellmaps_hierarchyeval.cellmaps_hierarchyevalcmd

#! /usr/bin/env python
import json
import os
import argparse
import sys
import logging
import logging.config
from cellmaps_utils import logutils
from cellmaps_utils import constants
import cellmaps_hierarchyeval
from cellmaps_hierarchyeval.runner import CellmapshierarchyevalRunner
from cellmaps_hierarchyeval.analysis import OllamaCommandLineGeneSetAgent
from cellmaps_hierarchyeval.analysis import OllamaRestServiceGenesetAgent
from cellmaps_hierarchyeval.analysis import FakeGeneSetAgent

logger = logging.getLogger(__name__)


HIERARCHYDIR = '--hierarchy_dir'
PATH_TO_OLLAMA = '/usr/local/bin/ollama'


def _parse_arguments(desc, args):
    """
    Parses command line arguments

    :param desc: description to display on command line
    :type desc: str
    :param args: command line arguments usually :py:func:`sys.argv[1:]`
    :type args: list
    :return: arguments parsed by :py:mod:`argparse`
    :rtype: :py:class:`argparse.Namespace`
    """
    parser = argparse.ArgumentParser(description=desc,
                                     formatter_class=constants.ArgParseFormatter)
    parser.add_argument('outdir', help='Output directory')
    parser.add_argument(HIERARCHYDIR, required=True,
                        help='Directory where hierarchy was generated')
    parser.add_argument('--max_fdr', type=float, default=CellmapshierarchyevalRunner.MAX_FDR,
                        help='Maximum false discovery rate')
    parser.add_argument('--min_jaccard_index', type=float,
                        default=CellmapshierarchyevalRunner.MIN_JACCARD_INDEX,
                        help='Minimum jaccard index')
    parser.add_argument('--min_comp_size', type=int, default=CellmapshierarchyevalRunner.MIN_COMP_SIZE,
                        help='Minimum term size to consider for enrichment')
    parser.add_argument('--corum', default=CellmapshierarchyevalRunner.CORUM,
                        help='UUID for CORUM network')
    parser.add_argument('--go_cc', default=CellmapshierarchyevalRunner.GO_CC,
                        help='UUID for GO-CC network')
    parser.add_argument('--hpa', default=CellmapshierarchyevalRunner.HPA,
                        help='UUID for HPA network')
    parser.add_argument('--ndex_server', default=CellmapshierarchyevalRunner.NDEX_SERVER,
                        help='NDEx server to use')
    parser.add_argument('--skip_term_enrichment', action='store_true',
                        help='If set, SKIP enrichment against networks set '
                             'via --corum, --go_cc, --hpa')
    parser.add_argument('--ollama', default=PATH_TO_OLLAMA,
                        help='Path to ollama command line binary or REST service. '
                             'If value starts with http it is assumed to be a REST '
                             'url and all prompts will be passed to service. For'
                             'REST url the suffix api/generate must be appended. '
                             'Example: http://foo/api/generate '
                             'NOTE: ollama integration with this tool is '
                             'EXPERIMENTAL and interface may be '
                             'changed or removed in the future ')
    parser.add_argument('--ollama_user',
                        help='Username to pass as basic auth to ollama REST '
                             'service')
    parser.add_argument('--ollama_password',
                        help='Password to pass via basic autho to ollama REST '
                             'service')
    parser.add_argument('--ollama_prompts', nargs='+',
                        help='Comma delimited value of format <MODEL NAME> or '
                             '<MODEL NAME>,<PROMPT> '
                             'where <PROMPT> can be path to prompt file or prompt to '
                             'run. For insertion of gene set please include {GENE_SET} '
                             'in prompt and tell LLM to put Process: <name> on first line '
                             'with name assigned to assembly and Confidence Score: <score> '
                             'on 2nd line with confidence in the name given. '
                             'If just <MODEL NAME> is set, then default prompt is used with '
                             'model specified. '
                             'NOTE: if <MODEL NAME> is set to FAKE then a completely fake '
                             ' agent will be used. Also note: ollama integration with this '
                             'tool is EXPERIMENTAL and interface may be '
                             'changed or removed in the future ')
    parser.add_argument('--provenance',
                        help='Path to file containing provenance '
                             'information about input files in JSON format. '
                             'This is required if inputdir does not contain '
                             'ro-crate-metadata.json file.')
    parser.add_argument('--name',
                        help='Name of this run, needed for FAIRSCAPE. If '
                             'unset, name value from specified '
                             'by --hierarchy_dir directory or provenance file will be used')
    parser.add_argument('--organization_name',
                        help='Name of organization running this tool, needed '
                             'for FAIRSCAPE. If unset, organization name specified '
                             'in --hierarchy_dir directory or provenance file will be used')
    parser.add_argument('--project_name',
                        help='Name of project running this tool, needed for '
                             'FAIRSCAPE. If unset, project name specified '
                             'in --hierarchy_dir directory or provenance file will be used')
    parser.add_argument('--skip_logging', action='store_true',
                        help='If set, output.log, error.log '
                             'files will not be created')
    parser.add_argument('--logconf', default=None,
                        help='Path to python logging configuration file in '
                             'this format: https://docs.python.org/3/library/'
                             'logging.config.html#logging-config-fileformat '
                             'Setting this overrides -v parameter which uses '
                             ' default logger. (default None)')
    parser.add_argument('--verbose', '-v', action='count', default=1,
                        help='Increases verbosity of logger to standard '
                             'error for log messages in this module. Messages are '
                             'output at these python logging levels '
                             '-v = WARNING, -vv = INFO, '
                             '-vvv = DEBUG, -vvvv = NOTSET (default ERROR '
                             'logging)')
    parser.add_argument('--version', action='version',
                        version=('%(prog)s ' +
                                 cellmaps_hierarchyeval.__version__))

    return parser.parse_args(args)



[docs]
def get_ollama_geneset_agents(ollama=PATH_TO_OLLAMA, ollama_prompts=None,
                              username=None, password=None):
    """
    Parses **ollama_prompts** from argparse and creates geneset agents

    :param ollama: Path to ollama binary or REST service
    :type ollama: str
    :param ollama_prompts:
    :type ollama_prompts: list
    :return:
    """
    if ollama_prompts is None:
        return None

    res = []
    use_rest_service = False
    if ollama.startswith('http'):
        logger.info('For all agents, using ollama REST service: ' +
                    str(ollama))
        if not ollama.endswith('api/generate'):
            logger.warning(str(ollama) +
                           ' does not end with api/generate and may not work.')
        use_rest_service = True

    for o_prompt in ollama_prompts:
        model, prompt = get_model_prompt_from_string(o_prompt)
        if model.lower() == 'fake':
            logger.debug('Creating FAKE geneset agent')
            res.append(FakeGeneSetAgent())
            continue

        logger.debug('Creating ollama geneset agent for model: ' + str(model))
        if use_rest_service is True:
            agent = OllamaRestServiceGenesetAgent(rest_url=ollama, username=username,
                                                  password=password,
                                                  model=model, prompt=prompt)
        else:
            agent = OllamaCommandLineGeneSetAgent(ollama_binary=ollama,
                                                  model=model, prompt=prompt)
        res.append(agent)
    return res




[docs]
def get_model_prompt_from_string(o_prompt):
    """
    Given argument from --ollama_prompts flag extract
    model and prompt which can be in following formats:

    <MODEL>
    or
    <MODEL>,<PROMPT>

    Where <MODEL> will always just be a string, but <PROMPT>
    can be a string or a path to a file

    :param o_prompt: argument passed to --ollama_prompts
    :type o_prompt: str
    :return: model, prompt
    :rtype: tuple
    """
    split_prompt = o_prompt.split(',')
    model = split_prompt[0]
    prompt = None
    if len(split_prompt) > 1:
        raw_prompt = split_prompt[1]
        if os.path.isfile(raw_prompt):
            with open(raw_prompt, 'r') as f:
                prompt = f.read()
        else:
            prompt = raw_prompt

    return model, prompt




[docs]
def main(args):
    """
    Main entry point for program

    :param args: arguments passed to command line usually :py:func:`sys.argv[1:]`
    :type args: list

    :return: return value of :py:meth:`cellmaps_hierarchyeval.runner.CellmapshierarchyevalRunner.run`
             or ``2`` if an exception is raised
    :rtype: int
    """
    desc = """
    Version {version}
    Takes a HiDeF {hierarchy_file} file from {hierarchy_dir} and runs
    enrichment tests for GO, CORUM, and HPA terms.

    Also includes EXPERIMENTAL support for invocation of LLMs via Ollama command
    line or Ollama REST service.

    To use see --ollama and --ollama_prompts flags

    """.format(version=cellmaps_hierarchyeval.__version__,
               hierarchy_file=constants.HIERARCHY_NETWORK_PREFIX,
               hierarchy_dir=HIERARCHYDIR)

    theargs = _parse_arguments(desc, args[1:])
    theargs.program = args[0]
    theargs.version = cellmaps_hierarchyeval.__version__

    if theargs.provenance is not None:
        with open(theargs.provenance, 'r') as f:
            json_prov = json.load(f)
    else:
        json_prov = None

    try:
        logutils.setup_cmd_logging(theargs)

        ollama_prompts = get_ollama_geneset_agents(ollama=theargs.ollama,
                                                   ollama_prompts=theargs.ollama_prompts,
                                                   username=theargs.ollama_user,
                                                   password=theargs.ollama_password)

        return CellmapshierarchyevalRunner(outdir=theargs.outdir,
                                           max_fdr=theargs.max_fdr,
                                           min_jaccard_index=theargs.min_jaccard_index,
                                           min_comp_size=theargs.min_comp_size,
                                           corum=theargs.corum,
                                           go_cc=theargs.go_cc,
                                           hpa=theargs.hpa,
                                           ndex_server=theargs.ndex_server,
                                           geneset_agents=ollama_prompts,
                                           name=theargs.name,
                                           organization_name=theargs.organization_name,
                                           project_name=theargs.project_name,
                                           hierarchy_dir=theargs.hierarchy_dir,
                                           skip_term_enrichment=theargs.skip_term_enrichment,
                                           skip_logging=theargs.skip_logging,
                                           input_data_dict=theargs.__dict__,
                                           provenance=json_prov).run()
    except Exception as e:
        logger.exception('Caught exception: ' + str(e))
        return 2
    finally:
        logging.shutdown()



if __name__ == '__main__':  # pragma: no cover
    sys.exit(main(sys.argv))