#!/usr/bin/env python

#  Copyright (c) 2016-2019, Broad Institute, Inc. All rights reserved.
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions are met:
#
#  * Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
#  * Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
#  * Neither the name Broad Institute, Inc. nor the names of its
#    contributors may be used to endorse or promote products derived from
#    this software without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
#  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
#  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
#  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
#  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
#  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

import argparse
import numpy as np
import kmertools
import h5py


parser = argparse.ArgumentParser()
parser.add_argument("-o", "--output", help="pangenome output hdf5 file")
parser.add_argument("--K", type=int, default=kmertools.DEFAULT_K, help="Kmer size")
parser.add_argument("--fingerprint", "-f", action="store_true", help="use MinHash fingerprints rather than whole genomes")
parser.add_argument('strains', nargs='+', help='kmerized strain hdf5 or npz files')
args = parser.parse_args()

panKmerSet = None
fingerprint_fraction = None

with h5py.File(args.output, 'w') as h5:
    for s in args.strains:
        name = kmertools.nameFromPath(s)
        print(name)

        kset = kmertools.kmerSetFromFile(s)

        if args.fingerprint:
            if fingerprint_fraction:
                assert kset.fingerprint_fraction == fingerprint_fraction, "Fingerprint fraction much be consistent"
            fingerprint_fraction = kset.fingerprint_fraction
            kset = kset.fingerprintAsKmerSet()

        strainGroup = h5.create_group(name)

        kset.save_hdf5(strainGroup, compress="gzip")

        if panKmerSet is None:
            panKmerSet = kset
        else:
            panKmerSet = panKmerSet.mergeKmerSet(kset)

    if fingerprint_fraction:
        panKmerSet.fingerprint_fraction = fingerprint_fraction
    panKmerSet.save_hdf5(h5, compress="gzip")




