Source code for tag.directive

#!/usr/bin/env python
#
# ------------------------------------------------------------------------------
# Copyright (C) 2015 Daniel Standage <daniel.standage@gmail.com>
#
# This file is part of tag (http://github.com/standage/tag) and is licensed
# under the BSD 3-clause license: see LICENSE.
# ------------------------------------------------------------------------------

import re
from tag.range import Range


dirtypes = ['gff-version', 'sequence-region', 'feature-ontology', 'species',
            'attribute-ontology', 'source-ontology', 'genome-build']


[docs]class Directive(object): """ Represents a directive from a GFF3 file. This class is primarily for error checking and data access. Once created, `Directive` objects should be treated as read-only: modify at your peril! Also, separator directives (`###`) and the `##FASTA` directive are handled directly by parsers and not by this class. Directives not explicitly declared in the GFF3 spec are application specific: they will be parsed without complaint, but no guarantees can be made about accessing their attributes. >>> sr = Directive('##sequence-region chr1 5000 10000') >>> sr.type 'sequence-region' >>> sr.seqid 'chr1' >>> gb = Directive('##genome-build BeeBase 4.5') >>> gb.type 'genome-build' >>> gb.source 'BeeBase' """ def __init__(self, data): assert data.startswith('##') self._rawdata = data formatmatch = re.match(r'##gff-version\s+(\d+)', data) if formatmatch: self.dirtype = 'gff-version' self.version = formatmatch.group(1) assert self.version == '3', 'Only GFF version 3 is supported' return formatmatch = re.match(r'##sequence-region\s+(\S+) (\d+) (\d+)', data) if formatmatch: self.dirtype = 'sequence-region' self.seqid = formatmatch.group(1) self.range = Range(int(formatmatch.group(2)) - 1, int(formatmatch.group(3))) return formatmatch = re.match( r'##((feature|attribute|source)-ontology)\s+(\S+)', data ) if formatmatch: self.dirtype = formatmatch.group(1) self.uri = formatmatch.group(3) return formatmatch = re.match(r'##species\s+(\S+)', data) if formatmatch: self.dirtype = 'species' self.uri = formatmatch.group(1) return formatmatch = re.match(r'##genome-build\s+(\S+)\s+(\S+)', data) if formatmatch: self.dirtype = 'genome-build' self.source = formatmatch.group(1) self.build_name = formatmatch.group(2) return formatmatch = re.match(r'##(\S+)(\s+(.+))*', data) assert formatmatch self.dirtype = formatmatch.group(1) self.data = formatmatch.group(3) assert self.dirtype is not None @property def type(self): if self.dirtype in dirtypes: return self.dirtype return None @property def slug(self): if self.type == 'sequence-region': return 'sequence {}[{}, {}]'.format(self.seqid, self.range.start+1, self.range.end) def __repr__(self): return self._rawdata def __lt__(self, other): if self.type == 'gff-version': return True if self.type == 'sequence-region': if isinstance(other, Directive): if other.type == 'gff-version': return False elif other.type == 'sequence-region': if self.seqid == other.seqid: return self.range.__lt__(other.range) else: return self.seqid < other.seqid else: return True else: return True if isinstance(other, Directive): return self._rawdata < other._rawdata else: return True def __le__(self, other): if self.type == 'gff-version': return True if self.type == 'sequence-region': if isinstance(other, Directive): if other.type == 'gff-version': return False elif other.type == 'sequence-region': if self.seqid == other.seqid: return self.range.__le__(other.range) else: return self.seqid <= other.seqid else: return True else: return True if isinstance(other, Directive): return self._rawdata <= other._rawdata else: return True def __gt__(self, other): return not self.__le__(other) def __ge__(self, other): return not self.__lt__(other)