#!/usr/bin/env python

# executing script allowing direct input of a protein name to get a graph back
# import stuff for making CLI

import os
import argparse

# import functionality to get a sequence
from getSequence.get_sequence import seq_from_name


if __name__ == "__main__":
    # Parse command line arguments.
    parser = argparse.ArgumentParser(description='Get a sequence from a protein name or UniProt accession number.')

    parser.add_argument('name', nargs='+', help='Name of the protein.')

    parser.add_argument('-v', '--verbose', action='store_true', help='Optional. Use this flag to print the full uniprot ID to the terminal.')

    parser.add_argument('-s', '--silent', action='store_true', help='Optional. Use this flag to stop any printed text to the terminal except for the sequence.')


    # parse the args
    args = parser.parse_args()

    # get protein name 
    if len(args.name) == 1:
        final_name = args.name[0]
        just_protein_name = True

    else:
        final_name = ''
        for i in args.name:
            final_name += i
            final_name += ' '
        final_name = final_name[:len(final_name)-1]
        just_protein_name = False

    # sequence and name
    seq_and_name = seq_from_name(final_name)

    # get the uniprot ID
    full_uniprot_id = seq_and_name[0]

    # get sequence
    sequence = seq_and_name[1]

    protein_name = full_uniprot_id.split('|')[2].split('_')[0]



    # Try to get the name of the organism, if you can't no worries.
    try:
        full_organism_name = full_uniprot_id.split('OS=')[1].split()[:2]
        organism = ''
        for i in full_organism_name:
            organism += i
            organism += '_'
        organism = organism[:len(organism)-1]

    except:
        organism = ''

    # maybe add in functionality to put the uniprot ID in the graph title.
    short_uniprot_id = seq_and_name[2]

    # see if you should print the entire uniprot ID to the terminal
    if args.verbose:
        print_uniprot=True
    else:
        print_uniprot=False

    # set the title to contain the input name, the organism that was found (if possible)
    # and the short form of the uniprot ID
    if organism != '':
        if just_protein_name == True:
            final_title = '>' + organism + '_' + final_name + '_' + f'{short_uniprot_id}'
        else:
            final_title = '>' + organism + '_' + protein_name + '_' + f'{short_uniprot_id}'
    else:
        final_title = '>' + final_name + '_' + f'{short_uniprot_id}'

    # get rid of spaces in final title
    new_title = ''
    for i in final_title:
    	if i == ' ':
    		new_title += '_'
    	else:
    		new_title += i

    final_title = new_title

    # figure out what to print if anything
    if print_uniprot==True:
        print(full_uniprot_id)
        print(sequence)
    elif args.silent:
    	print(sequence)
    else:
        print(final_title)
        print(sequence)

