Source code for tag.select

#!/usr/bin/env python
#
# -----------------------------------------------------------------------------
# Copyright (C) 2016 Daniel Standage <daniel.standage@gmail.com>
#
# This file is part of tag (http://github.com/standage/tag) and is licensed
# under the BSD 3-clause license: see LICENSE.
# -----------------------------------------------------------------------------

import heapq
from itertools import chain
import tag


[docs]def features(entrystream, type=None, traverse=False): """ Pull features out of the specified entry stream. :param entrystream: a stream of entries :param type: retrieve only features of the specified type; set to :code:`None` to retrieve all features :param traverse: by default, only top-level features are selected; set to :code:`True` to search each feature graph for the specified feature type """ def _typecheck(feat): if isinstance(type, str): return feat.type == type return feat.type in type for feature in entry_type_filter(entrystream, tag.Feature): if traverse: if type is None: message = 'cannot traverse without a specific feature type' raise ValueError(message) if _typecheck(feature): yield feature else: for subfeature in feature: if _typecheck(subfeature): yield subfeature else: if not type or _typecheck(feature): yield feature
[docs]def window(featurestream, seqid, start=None, end=None, strict=True): """ Pull features out of the designated genomic interval. This function uses 0-based half-open intervals, not the 1-based closed intervals used by GFF3. :param featurestream: a stream of feature entries :param seqid: ID of the sequence from which to select features :param start: start of the genomic interval :param end: end of the genomic interval :param strict: when set to :code:`True`, only features completely contained within the interval are selected; when set to :code:`False`, any feature overlapping the interval is selected """ region = None if start and end: region = tag.Range(start, end) for feature in featurestream: if feature.seqid != seqid: continue if region: if strict: if region.contains(feature._range): yield feature else: if region.overlap(feature._range): yield feature else: yield feature
[docs]def directives(entrystream, type=None): """ Pull directives out of the specified entry stream. :param entrystream: a stream of entries :param type: retrieve only directives of the specified type; set to :code:`None` to retrieve all directives """ for directive in entry_type_filter(entrystream, tag.Directive): if not type or type == directive.type: yield directive
[docs]def sequences(entrystream): """Pull sequences out of the specified entry stream.""" for sequence in entry_type_filter(entrystream, tag.Sequence): yield sequence
[docs]def entry_type_filter(entrystream, entryclass): """ Generic entry filter. :param entrystream: a stream of entries :param entryclass: specify the type of entry upon which to filter """ for entry in entrystream: if isinstance(entry, entryclass): yield entry
[docs]def merge(*sorted_streams): """Efficiently merge sorted annotation streams.""" heap = list() heapq.heapify(heap) streams = chain(sorted_streams) for record in heapq.merge(heap, *streams): yield record