# -*- coding: utf-8 -*-

"""Extract *all* xrefs from OBO documents available."""

import os
from typing import Iterable, Set, Tuple

import click
import pandas as pd

from pyobo import get_obo_graph_by_prefix, get_obo_graph_by_url
from pyobo.mappings import iterate_xrefs_from_graph
from pyobo.mappings.extract_xrefs import UNHANDLED_NAMESPACES
from pyobo.utils import MissingOboBuild

#: Keys are prefixes and values point to OBO URLs to download
OBO = {
    # High quality
    'hp': 'http://purl.obolibrary.org/obo/hp.obo',
    "chebi": "http://purl.obolibrary.org/obo/chebi.obo",
    "chiro": "https://raw.githubusercontent.com/obophenotype/chiro/master/chiro.obo",
    'doid': 'http://purl.obolibrary.org/obo/doid.obo',
    'efo': 'http://www.ebi.ac.uk/efo/efo.obo',
    "go": "http://purl.obolibrary.org/obo/go.obo",
    "obi": "http://purl.obolibrary.org/obo/obi.obo",
    # Others
    "pr": "http://purl.obolibrary.org/obo/pr.obo",
    "bto": "http://purl.obolibrary.org/obo/bto.obo",
    "cl": "http://purl.obolibrary.org/obo/cl.obo",
    # "clo":  # not distributed as OBO
    "cmo": "http://purl.obolibrary.org/obo/cmo.obo",
    "ecto": "http://purl.obolibrary.org/obo/ecto.obo",
    "exo": "http://purl.obolibrary.org/obo/exo.obo",
    "fbbt": "http://purl.obolibrary.org/obo/fbbt.obo",
    'mondo': 'http://purl.obolibrary.org/obo/mondo.obo',
    "mp": "http://purl.obolibrary.org/obo/mp.obo",
    "mpath": "https://raw.githubusercontent.com/PaulNSchofield/mpath/master/mpath.obo",
    'ncit': 'http://purl.obolibrary.org/obo/ncit.obo',
    "pato": "http://purl.obolibrary.org/obo/pato.obo",
    "peco": "http://purl.obolibrary.org/obo/peco.obo",
    "pw": "http://purl.obolibrary.org/obo/pw.obo",
    'symp': 'http://purl.obolibrary.org/obo/symp.obo',
    "to": "http://purl.obolibrary.org/obo/to.obo",
    "uberon": "http://purl.obolibrary.org/obo/uberon/basic.obo",
}

AUTOGENERATED = {
    "hgnc": None,
    "mirbase": None,
    "sgd": None,
    "hgnc.genefamily": None,
    "mgi": None,
    "rgd": None,
}
OBO.update(AUTOGENERATED)


def _iterate_all_xrefs() -> Iterable[Tuple[str, str, str, str, str]]:
    for prefix, url in OBO.items():
        try:
            graph = get_obo_graph_by_prefix(prefix)
        except MissingOboBuild:
            graph = get_obo_graph_by_url(prefix, url=url)

        for head_ns, head_id, xref_ns, xref_id in iterate_xrefs_from_graph(graph):
            yield head_ns, head_id, xref_ns, xref_id, prefix


def _get_xrefs_df() -> pd.DataFrame:
    xrefs: Set[Tuple[str, str, str, str, str]] = set(_iterate_all_xrefs())
    columns = ['head_ns', 'head_id', 'tail_ns', 'tail_id', 'source']
    return pd.DataFrame(xrefs, columns=columns).sort_values(columns)


@click.command()
@click.option('--directory', type=click.Path(dir_okay=True, file_okay=False, exists=True), default=os.getcwd())
def main(directory):  # noqa: D202
    """Output the mappings."""

    def _write_tsv(df: pd.DataFrame, name: str) -> None:
        df.to_csv(os.path.join(directory, name), sep='\t', index=False)

    xrefs_df = _get_xrefs_df()

    # Export all xrefs
    _write_tsv(xrefs_df, f'xrefs.tsv')
    _write_tsv(xrefs_df, f'xrefs.tsv.gz')

    # Export a sample of xrefs
    _write_tsv(xrefs_df.head(), f'xrefs_sample.tsv')

    # Export a summary dataframe
    summary_df = xrefs_df.groupby(['source', 'tail_ns'])['head_ns'].count().reset_index()
    summary_df = summary_df.sort_values(['head_ns'], ascending=False)
    _write_tsv(summary_df, 'summary.tsv')

    # Export the namespaces that haven't been handled yet
    unmapped_path = os.path.join(directory, 'unmapped.tsv')
    with open(unmapped_path, 'w') as file:
        for namespace, items in sorted(UNHANDLED_NAMESPACES.items()):
            for curie, xref in items:
                print(curie, namespace, xref, file=file, sep='\t')


if __name__ == '__main__':
    main()
