Source code for alhambra.edotparen
import collections
import re
import string
from . import seq as sq
from .seq import _VALID_NTS, revcomp
[docs]edp_closetoopen = {x: y for x, y in zip(string.ascii_lowercase, string.ascii_uppercase)}
edp_closetoopen.update({")": "(", "]": "[", "}": "{"})
[docs]def check_edotparen_consistency(expr):
expr = expand_compact_edotparen(expr)
expr = re.sub(r"\s+", "", expr)
counts = collections.Counter()
strand = 0
strandloc = 0
for s in expr:
if s in edp_closetoopen.values():
counts[s] += 1
elif s in edp_closetoopen.keys():
try:
counts[edp_closetoopen[s]] -= 1
except KeyError:
raise ValueError("Opening not found", s, strand, strandloc)
elif s == ".":
pass
elif s == "+":
strand += 1
strandloc = 0
continue
else:
raise ValueError("Unknown char", s, strand, strandloc)
strandloc += 1
if max(counts.values()) > 0:
raise ValueError(counts)
[docs]def check_edotparen_sequence(edotparen, sequence):
expr = re.sub(r"\s+", "", expand_compact_edotparen(edotparen))
seq = re.sub(r"\s+", "", sequence).lower()
if len(expr) != len(seq):
raise ValueError("Unequal lengths")
stacks = {}
strand = 0
strandloc = 0
for s, v in zip(expr, seq):
if s in edp_closetoopen.values():
if s not in stacks.keys():
stacks[s] = []
stacks[s].append(v)
elif s in edp_closetoopen.keys():
ss = edp_closetoopen[s]
if ss not in stacks.keys():
raise ValueError("Opening not found", s, strand, strandloc)
vv = stacks[ss].pop()
try:
sq.merge(v, revcomp(vv))
except sq.MergeConflictError:
raise ValueError(
"{} != WC({}) at strand {} loc {} (both from 0)".format(
v, vv, strand, strandloc
),
v,
vv,
strand,
strandloc,
) from None
elif s == ".":
assert v in _VALID_NTS
elif s == "+":
assert v == "+"
strand += 1
strandloc = 0
continue
else:
raise ValueError("Unknown char", s, strand, strandloc)
strandloc += 1
if max(len(stack) for stack in stacks.values()) > 0:
raise ValueError(stacks)
[docs]def expand_compact_edotparen(expr):
return re.sub(
r"(\d+)([\[\]\(\)\{\}A-Za-z\.])", lambda m: int(m.group(1)) * m.group(2), expr
)
[docs]def prettify_edotparen(expr):
# This is evil:
return re.sub(
r"(([\[\]\(\)\{\}A-Za-z\.])\2+)",
lambda m: "{}{}".format(len(m.group(1)), m.group(2)),
expr,
)