@inbook{c1accf80eb7f4af081a379ce18f91bf1,
title = "Dictionary Matching in Elastic-Degenerate Texts with Applications in Searching VCF Files On-line",
abstract = "An elastic-degenerate string is a sequence of n sets of strings of total length N. It has been introduced to represent multiple sequence alignments of closely-related sequences in a compact form. For a standard pattern of length m, pattern matching in an elastic-degenerate text can be solved on-line in time O(nm^2+N) with pre-processing time and space O(m) (Grossi et al., CPM 2017). A fast bit-vector algorithm requiring time O(N * ceil[m/w]) with pre-processing time and space O(m * ceil[m/w]), where w is the size of the computer word, was also presented. In this paper we consider the same problem for a set of patterns of total length M. A straightforward generalization of the existing bit-vector algorithm would require time O(N * ceil[M/w]) with pre-processing time and space O(M * ceil[M/w]), which is prohibitive in practice. We present a new on-line O(N * ceil[M/w])-time algorithm with pre-processing time and space O(M). We present experimental results using both synthetic and real data demonstrating the performance of the algorithm. We further demonstrate a real application of our algorithm in a pipeline for discovery and verification of minimal absent words (MAWs) in the human genome showing that a significant number of previously discovered MAWs are in fact false-positives when a population's variants are considered.",
author = "Pissis, {Solon P.} and Ahmad Retha",
year = "2018",
doi = "10.4230/LIPIcs.SEA.2018.16",
language = "English",
isbn = "978-3-95977-070-5",
volume = "103",
series = "Leibniz International Proceedings in Informatics (LIPIcs)",
publisher = "Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik",
pages = "16:1--16:14",
editor = "Gianlorenzo D'Angelo",
booktitle = "17th International Symposium on Experimental Algorithms (SEA 2018)",
}