Source code for tag.locus

#!/usr/bin/env python
#
# -----------------------------------------------------------------------------
# Copyright (C) 2019 Battelle National Biodefense Institute.
#
# This file is part of tag (http://github.com/standage/tag) and is licensed
# under the BSD 3-clause license: see LICENSE.
# -----------------------------------------------------------------------------

import tag


class LocusBuffer(object):
    def __init__(self, firstfeature):
        self.buffer = [firstfeature]
        self.seqid = firstfeature.seqid
        self.range = firstfeature.range

    def test(self, feature, minbp=25, minperc=0.25):
        if feature.seqid != self.seqid:
            return False
        insufficient_overlap = not feature.range.overlap_atleast(
            self.range, minbp=minbp, minperc=minperc
        )
        if insufficient_overlap:
            return False
        return True

    def add(self, feature):
        self.buffer.append(feature)
        self.range = self.range.merge(feature.range)


[docs]def loci(*sorted_streams, **kwargs): """Determine feature loci from two or more sorted annotation streams. Rather than simply relying on gene coordinates from a reference annotation, this function will determine coordinates for features of interest by considering an arbitrary number of annotation sources, whether they be references or predictions. This enables efficient and accurate assessment of both sensitivity and specificity at the level of individual nucleotides and entire features. """ featuretype = kwargs['featuretype'] if 'featuretype' in kwargs else None minbp = kwargs['minbp'] if 'minbp' in kwargs else 25 minperc = kwargs['minperc'] if 'minperc' in kwargs else 0.25 locus = None merger = tag.select.merge(*sorted_streams) features = tag.select.features(merger, type=featuretype) for feature in features: if locus is None: locus = LocusBuffer(feature) elif locus.test(feature, minbp=minbp, minperc=minperc) is True: locus.add(feature) else: yield locus.seqid, locus.range, tuple(locus.buffer) locus = LocusBuffer(feature) yield locus.seqid, locus.range, tuple(locus.buffer)
[docs]def pocus(*sorted_streams, **kwargs): """Feature stream for locus parsing. From two or more sorted annotation streams, create a new stream that yields the features from the original streams, additional `locus` features, and separator directives (`###`) between loci. """ delta = kwargs['delta'] if 'delta' in kwargs else 0 for seqid, rng, features in loci(*sorted_streams, **kwargs): locus = tag.Feature( seqid, 'experimental_feature', rng.start - delta, rng.end + delta, source='tag::locus::pocus', ) locus.add_attribute('Note', 'Interval locus') yield locus for feature in features: yield feature yield tag.Directive('###')