@inbook{b7417698c87d41f1b069f9194da7ba7f, title = "Finding the Anticover of a String", abstract = "A k-anticover of a string x is a set of pairwise distinct factors of x of equal length k, such that every symbol of x is contained into an occurrence of at least one of those factors. The existence of a k-anticover can be seen as a notion of non-redundancy, which has application in computational biology, where they are associated with various non-regulatory mechanisms. In this paper we address the complexity of the problem of finding a k-anticover of a string x if it exists, showing that the decision problem is NP-complete on general strings for k-3. We also show that the problem admits a polynomial-time solution for k = 2. For unbounded k, we provide an exact exponential algorithm to find a k-anticover of a string of length n (or determine that none exists), which runs in O(min{3 n-k 3 , ( k(k+1) 2 ) n k+1 }) time using polynomial space. 2012 ACM Subject Classification Mathematics of computing ! Combinatorics on words.", keywords = "Anticover, Np-complete, String algorithms, Stringology", author = "Mai Alzamel and Alessio Conte and Shuhei Denzumi and Roberto Grossi and Iliopoulos, {Costas S.} and Kazuhiro Kurita and Kunihiro Wasa", year = "2020", month = jun, day = "1", doi = "10.4230/LIPIcs.CPM.2020.2", language = "English", series = "Leibniz International Proceedings in Informatics, LIPIcs", publisher = "Schloss Dagstuhl- Leibniz-Zentrum fur Informatik GmbH, Dagstuhl Publishing", editor = "Gortz, {Inge Li} and Oren Weimann", booktitle = "31st Annual Symposium on Combinatorial Pattern Matching, CPM 2020", address = "Germany", note = "31st Annual Symposium on Combinatorial Pattern Matching, CPM 2020 ; Conference date: 17-06-2020 Through 19-06-2020", }
@article{64bdcb47fa22482ab953c73d9e822379, title = "GenMap: ultra-fast computation of genome mappability", abstract = "Motivation: Computing the uniqueness of k-mers for each position of a genome while allowing for up to e mismatches is computationally challenging. However, it is crucial for many biological applications such as the design of guide RNA for CRISPR experiments. More formally, the uniqueness or (k, e)-mappability can be described for every position as the reciprocal value of how often this k-mer occurs approximately in the genome, i.e. with up to e mismatches. Results: We present a fast method GenMap to compute the (k, e)-mappability. We extend the mappability algorithm, such that it can also be computed across multiple genomes where a k-mer occurrence is only counted once per genome. This allows for the computation of marker sequences or finding candidates for probe design by identifying approximate k-mers that are unique to a genome or that are present in all genomes. GenMap supports different formats such as binary output, wig and bed files as well as csv files to export the location of all approximate k-mers for each genomic position.", author = "Christopher Pockrandt and Mai Alzamel and Iliopoulos, {Costas S.} and Knut Reinert", year = "2020", month = mar, day = "31", doi = "10.1093/bioinformatics/btaa222", language = "English", volume = "36", pages = "3687--3692", journal = "Bioinformatics (Oxford, England)", issn = "1367-4811", number = "12", }
@article{f849c958ad1840f092ac0b672db7ef3f, title = "Comparing Degenerate Strings", abstract = "Uncertain sequences are compact representations of sets of similar strings. They highlight common segments by collapsing them, and explicitly represent varying segments by listing all possible options. A generalized degenerate string (GD string) is a type of uncertain sequence. Formally, a GD string S is a sequence of n sets of strings of total size N, where the ith set contains strings of the same length ki but this length can vary between different sets. We denote by W the sum of these lengths k0, k1,... , kn-1. Our main result is an (N + M)-time algorithm for deciding whether two GD strings of total sizes N and M, respectively, over an integer alphabet, have a non-empty intersection. This result is based on a combinatorial result of independent interest: although the intersection of two GD strings can be exponential in the total size of the two strings, it can be represented in linear space. We then apply our string comparison tool to devise a simple algorithm for computing all palindromes in S in (min{W, n2}N)-time. We complement this upper bound by showing a similar conditional lower bound for computing maximal palindromes in S. We also show that a result, which is essentially the same as our string comparison linear-time algorithm, can be obtained by employing an automata-based approach.", keywords = "degenerate strings, elastic-degenerate strings, generalized degenerate strings, palindromes, string comparison", author = "Mai Alzamel and Ayad, {Lorraine A.K.} and Giulia Bernardini and Roberto Grossi and Iliopoulos, {Costas S.} and Nadia Pisanti and Pissis, {Solon P.} and Giovanna Rosone", year = "2020", doi = "10.3233/FI-2020-1947", language = "English", volume = "175", pages = "41--58", journal = "FUNDAMENTA INFORMATICAE", issn = "0169-2968", publisher = "IOS Press", number = "1-4", }