Thetazero Pubs
escott8908

Introduction

library(IRanges)

Vector objects

Atomic Vectors

set.seed(0)
lambda <- c(rep(0.001, 4500), seq(0.001, 10, length = 500),  seq(10, 0.001, length = 500))
xVector <- Rle(rpois(1e7, lambda)) # this instantiates an Rle
yVector <- Rle(rpois(1e7, lambda[c(251:length(lambda), 1:250)])) # this instantiates an Rle
length(xVector)
## [1] 10000000
xVector
## integer-Rle of length 10000000 with 1510219 runs
##   Lengths:  780    1  208    1 1599    1 ...    1    5    1   91    1  927
##   Values :    0    1    0    1    0    1 ...    2    0    1    0    1    0
zVector = c(xVector, yVector)

Vector Subsetting

# all of these do the same damn thing.
window(xVector, 4751,4764)
## integer-Rle of length 14 with 11 runs
##   Lengths: 1 1 1 1 1 1 1 1 4 1 1
##   Values : 4 6 5 4 6 2 6 7 5 7 4
xVector[4751:4764]
## integer-Rle of length 14 with 11 runs
##   Lengths: 1 1 1 1 1 1 1 1 4 1 1
##   Values : 4 6 5 4 6 2 6 7 5 7 4
xVector[IRanges(4751,4764)]
## integer-Rle of length 14 with 11 runs
##   Lengths: 1 1 1 1 1 1 1 1 4 1 1
##   Values : 4 6 5 4 6 2 6 7 5 7 4
#play with this stuff
xSnippet = xVector[IRanges(4751,4764)]
xSnippet
## integer-Rle of length 14 with 11 runs
##   Lengths: 1 1 1 1 1 1 1 1 4 1 1
##   Values : 4 6 5 4 6 2 6 7 5 7 4
rev(xSnippet)
## integer-Rle of length 14 with 11 runs
##   Lengths: 1 1 4 1 1 1 1 1 1 1 1
##   Values : 4 7 5 7 6 2 6 4 5 6 4
rep(xSnippet, 2)
## integer-Rle of length 28 with 21 runs
##   Lengths: 1 1 1 1 1 1 1 1 4 1 2 1 1 1 1 1 1 1 4 1 1
##   Values : 4 6 5 4 6 2 6 7 5 7 4 6 5 4 6 2 6 7 5 7 4
subset(xSnippet, xSnippet > 5)
## integer-Rle of length 5 with 2 runs
##   Lengths: 3 2
##   Values : 6 7

Combining Vectors

xSnippet
## integer-Rle of length 14 with 11 runs
##   Lengths: 1 1 1 1 1 1 1 1 4 1 1
##   Values : 4 6 5 4 6 2 6 7 5 7 4
c(xSnippet, rev(xSnippet))
## integer-Rle of length 28 with 21 runs
##   Lengths: 1 1 1 1 1 1 1 1 4 1 2 1 4 1 1 1 1 1 1 1 1
##   Values : 4 6 5 4 6 2 6 7 5 7 4 7 5 7 6 2 6 4 5 6 4
append(xSnippet, xSnippet, after = 3)
## integer-Rle of length 28 with 21 runs
##   Lengths: 1 1 1 1 1 1 1 1 1 1 1 4 1 2 1 1 1 1 4 1 1
##   Values : 4 6 5 4 6 5 4 6 2 6 7 5 7 4 6 2 6 7 5 7 4

Looping over Vectors and Vector subsets

xSnippet
## integer-Rle of length 14 with 11 runs
##   Lengths: 1 1 1 1 1 1 1 1 4 1 1
##   Values : 4 6 5 4 6 2 6 7 5 7 4
# aggregate is for statistical summaries.

# but when used with start and with options, it calculates the statistical summaries along
# a sliding window beginning at start with a specific width

# aggregate(x, by, FUN, start=NULL, end=NULL, width=NULL, frequency=NULL, delta=NULL, ..., simplify=TRUE)


aggregate(xSnippet, start = 1:8, width = 3, FUN = median)
## [1] 5 5 5 4 6 6 6 5
cor(xVector, yVector)
## [1] 0.5739224
shifts = seq(235, 265, by=3)
corrs = shiftApply(shifts, yVector, xVector, FUN = cor)

plot(shifts, corrs)




Run Length Encoding

xRle = Rle(xVector)
yRle = Rle(yVector)
xRle
## integer-Rle of length 10000000 with 1510219 runs
##   Lengths:  780    1  208    1 1599    1 ...    1    5    1   91    1  927
##   Values :    0    1    0    1    0    1 ...    2    0    1    0    1    0
yRle
## integer-Rle of length 10000000 with 1511351 runs
##   Lengths: 1003    1  413    1  896    1 ...    1    3    1  845    1  419
##   Values :    0    1    0    1    0    1 ...    2    0    1    0    1    0
as.vector(object.size(xRle))/object.size(xVector)
## 1 bytes
identical(as.vector(xRle), xVector)
## [1] FALSE
head(runValue(xRle))
## [1] 0 1 0 1 0 1
head(runLength(xRle))
## [1]  780    1  208    1 1599    1
xRle > 0
## logical-Rle of length 10000000 with 197127 runs
##   Lengths:   780     1   208     1  1599 ...     5     1    91     1   927
##   Values : FALSE  TRUE FALSE  TRUE FALSE ... FALSE  TRUE FALSE  TRUE FALSE
xRle + yRle
## integer-Rle of length 10000000 with 1957707 runs
##   Lengths: 780   1 208   1  13   1 413   1 ...   5   1  91   1 507   1 419
##   Values :   0   1   0   1   0   1   0   1 ...   0   1   0   1   0   1   0
xRle > 0 | yRle > 0
## logical-Rle of length 10000000 with 210711 runs
##   Lengths:   780     1   208     1    13 ...    91     1   507     1   419
##   Values : FALSE  TRUE FALSE  TRUE FALSE ... FALSE  TRUE FALSE  TRUE FALSE
sum(xRle > 0 | yRle > 0)
## [1] 2105185
range(xRle)
## [1]  0 26
log1p(xRle)
## numeric-Rle of length 10000000 with 1510219 runs
##   Lengths:               780                 1 ...               927
##   Values :                 0 0.693147180559945 ...                 0
cor(xRle, yRle)
## [1] 0.5739224
shiftApply(249:251, yRle, xRle, FUN = function(x,y) var(x,y)/(sd(x) * sd(y)))
## [1] 0.8519138 0.8517324 0.8517725

Lists

Lists of Atomic Vectors

  • List
  • RleList, SimpleRleList, CompressedRleList
  • IntegerList, SimpleIntegerList, CompressedIntegerList
getClassDef('RleList')
## Virtual Class "RleList" [package "IRanges"]
## 
## Slots:
##                                                       
## Name:      elementType elementMetadata        metadata
## Class:       character DataTableORNULL            list
## 
## Extends: 
## Class "AtomicList", directly
## Class "List", by class "AtomicList", distance 2
## Class "Vector", by class "AtomicList", distance 3
## Class "Annotated", by class "AtomicList", distance 4
## 
## Known Subclasses: "RleViews", "CompressedRleList", "SimpleRleList"
cIntList1 = IntegerList(x=xVector, y=yVector)
cIntList1
## IntegerList of length 2
## [["x"]] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [["y"]] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
sIntList1 = IntegerList(x=xVector, y=yVector, compressed = FALSE)
sIntList1
## IntegerList of length 3
## [["x"]] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [["y"]] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [["compressed"]] 0
xExploded = lapply(xVector[1:5000], function(x) seq_len(x))
cIntList2 = IntegerList(xExploded)
sIntList2 = IntegerList(xExploded, compress=FALSE)
object.size(cIntList2)
## 33080 bytes
object.size(sIntList2)
## 253952 bytes
length(cIntList2)
## [1] 5000
Rle(elementLengths(cIntList2))
## integer-Rle of length 5000 with 427 runs
##   Lengths:  780    1  208    1 1599    1 ...    1    1    1    1    1    1
##   Values :    0    1    0    1    0    1 ...    5   10    9    6    9   12
xRleList <- RleList(xRle, 2L * rev(xRle))
yRleList <- RleList(yRle, 2L * rev(yRle))
xRleList > 0
## RleList of length 2
## [[1]]
## logical-Rle of length 10000000 with 197127 runs
##   Lengths:   780     1   208     1  1599 ...     5     1    91     1   927
##   Values : FALSE  TRUE FALSE  TRUE FALSE ... FALSE  TRUE FALSE  TRUE FALSE
## 
## [[2]]
## logical-Rle of length 10000000 with 197127 runs
##   Lengths:   927     1    91     1     5 ...  1599     1   208     1   780
##   Values : FALSE  TRUE FALSE  TRUE FALSE ... FALSE  TRUE FALSE  TRUE FALSE
xRleList + yRleList
## RleList of length 2
## [[1]]
## integer-Rle of length 10000000 with 1957707 runs
##   Lengths: 780   1 208   1  13   1 413   1 ...   5   1  91   1 507   1 419
##   Values :   0   1   0   1   0   1   0   1 ...   0   1   0   1   0   1   0
## 
## [[2]]
## integer-Rle of length 10000000 with 1957707 runs
##   Lengths: 419   1 507   1  91   1   5   1 ... 413   1  13   1 208   1 780
##   Values :   0   2   0   2   0   2   0   4 ...   0   2   0   2   0   2   0

Vector Ranges

ir1 <- IRanges(start = 1:10, width = 10:1)
ir2 <- IRanges(start = 1:10, end = 11)
ir3 <- IRanges(end = 11, width = 10:1)
identical(ir1, ir2) & identical(ir2, ir3)
## [1] FALSE
ir <- IRanges(c(1, 8, 14, 15, 19, 34, 40), width = c(12, 6, 6, 15, 6, 2, 7))

start(ir)
## [1]  1  8 14 15 19 34 40
end(ir)
## [1] 12 13 19 29 24 35 46
width(ir)
## [1] 12  6  6 15  6  2  7
ir[1:4]
## IRanges of length 4
##     start end width
## [1]     1  12    12
## [2]     8  13     6
## [3]    14  19     6
## [4]    15  29    15
ir[start(ir) <=15]
## IRanges of length 4
##     start end width
## [1]     1  12    12
## [2]     8  13     6
## [3]    14  19     6
## [4]    15  29    15
ir[[1]]
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12
plotRanges <- function(x, xlim = x, main = deparse(substitute(x)),  col = "black", sep = 0.5, ...) {
  height <- 1
  if (is(xlim, "Ranges")) { 
    xlim <- c(min(start(xlim)), max(end(xlim))) 
  }
  bins <- disjointBins(IRanges(start(x), end(x) + 1))
  plot.new()
  plot.window(xlim, c(0, max(bins)*(height + sep)))
  ybottom <- bins * (sep + height) - height
  rect(start(x)-0.5, ybottom, end(x)+0.5, ybottom + height, col = col, ...)
  title(main)
  axis(1)
}

Normality

The reduce function reduces any Ranges object to a NormalIRanges by merging redundant ranges.

reduce(ir)
## IRanges of length 3
##     start end width
## [1]     1  29    29
## [2]    34  35     2
## [3]    40  46     7
plotRanges(reduce(ir))

List of Ranges objects

rl = RangesList(ir, rev(ir))
start(rl)
## IntegerList of length 2
## [[1]] 1 8 14 15 19 34 40
## [[2]] 40 34 19 15 14 8 1

Vector Extraction

irextract = IRanges(start = c(4501, 4901), width = 100)
xRle[irextract]
## integer-Rle of length 200 with 159 runs
##   Lengths: 12  1  1  1  2  1  1  1  1  2 ...  1  1  1  1  1  1  1  1  1  1
##   Values :  0  1  0  2  0  1  0  1  0  1 ... 18  9 12  6  5 10  9  6  9 12
xRle[IRanges(start = c(4501, 4901), width = 100)]
## integer-Rle of length 200 with 159 runs
##   Lengths: 12  1  1  1  2  1  1  1  1  2 ...  1  1  1  1  1  1  1  1  1  1
##   Values :  0  1  0  2  0  1  0  1  0  1 ... 18  9 12  6  5 10  9  6  9 12

Finding Overlapping Ranges

ol = findOverlaps(ir, reduce(ir))
as.matrix(ol)
##      queryHits subjectHits
## [1,]         1           1
## [2,]         2           1
## [3,]         3           1
## [4,]         4           1
## [5,]         5           1
## [6,]         6           2
## [7,]         7           3

Counting OverlappingRanges

cov = coverage(ir)
plotRanges(ir)
cov1 = as.vector(cov)
mat = cbind(seq_along(cov1) - 0.5, cov1)
d = diff(cov1) != 0
mat2 = rbind(cbind(mat[d,1] + 1, mat[d,2]), mat)
mat3 = mat2[order(mat2[,1]),]
lines(mat, col='red', lwd=4)
axis(2)

Finding Neighboring Ranges

query <- IRanges(start = c(1, 12, 46), width=3, names = c('q1', 'q2', 'q3'))
subject <- IRanges(start = c(19, 58, 85), width = 25, names = c('s1', 's2', 's3'))
query
## IRanges of length 3
##     start end width names
## [1]     1   3     3    q1
## [2]    12  14     3    q2
## [3]    46  48     3    q3
subject
## IRanges of length 3
##     start end width names
## [1]    19  43    25    s1
## [2]    58  82    25    s2
## [3]    85 109    25    s3
plotRanges(c(query, subject))

nearest(query, subject)
## [1] 1 1 1
nearest(query, subject, select = 'all')
## Hits object with 3 hits and 0 metadata columns:
##       queryHits subjectHits
##       <integer>   <integer>
##   [1]         1           1
##   [2]         2           1
##   [3]         3           1
##   -------
##   queryLength: 3
##   subjectLength: 3
###############################################################
# Consider some combination of these for identifying regulons #
###############################################################
distanceToNearest(query, subject) 
## Hits object with 3 hits and 1 metadata column:
##       queryHits subjectHits |  distance
##       <integer>   <integer> | <integer>
##   [1]         1           1 |        15
##   [2]         2           1 |         4
##   [3]         3           1 |         2
##   -------
##   queryLength: 3
##   subjectLength: 3
distanceToNearest(query, subject, select = 'all')
## Hits object with 3 hits and 1 metadata column:
##       queryHits subjectHits |  distance
##       <integer>   <integer> | <integer>
##   [1]         1           1 |        15
##   [2]         2           1 |         4
##   [3]         3           1 |         2
##   -------
##   queryLength: 3
##   subjectLength: 3
precede(query, subject)
## [1] 1 1 2
precede(query, subject, select = 'all')
## Hits object with 3 hits and 0 metadata columns:
##       queryHits subjectHits
##       <integer>   <integer>
##   [1]         1           1
##   [2]         2           1
##   [3]         3           2
##   -------
##   queryLength: 3
##   subjectLength: 3

Transforming Ranges

Adjusting starts, ends and widths

ir
## IRanges of length 7
##     start end width
## [1]     1  12    12
## [2]     8  13     6
## [3]    14  19     6
## [4]    15  29    15
## [5]    19  24     6
## [6]    34  35     2
## [7]    40  46     7
shift(ir, 10)
## IRanges of length 7
##     start end width
## [1]    11  22    12
## [2]    18  23     6
## [3]    24  29     6
## [4]    25  39    15
## [5]    29  34     6
## [6]    44  45     2
## [7]    50  56     7
# This damn thing is confusing.  So it narrows each range into a subrange using the start and width.  The start option tells 
# the software where to begin, i.e. 1 means begin on the first coordinate of the range, while a 3 means begin on the 3rd 
# coordinate of the range.  It then creates a subrange that has a width of 'width'.
narrow(ir, start = 1:5, width = 2)
## IRanges of length 7
##     start end width
## [1]     1   2     2
## [2]     9  10     2
## [3]    16  17     2
## [4]    18  19     2
## [5]    23  24     2
## [6]    34  35     2
## [7]    41  42     2
restrict(ir, start=2, end=3)
## IRanges of length 1
##     start end width
## [1]     2   3     2
# I have no idea why this is or would be needed
threebands(ir, start=1:5, width=2)
## $left
## IRanges of length 7
##     start end width
## [1]     1   0     0
## [2]     8   8     1
## [3]    14  15     2
## [4]    15  17     3
## [5]    19  22     4
## [6]    34  33     0
## [7]    40  40     1
## 
## $middle
## IRanges of length 7
##     start end width
## [1]     1   2     2
## [2]     9  10     2
## [3]    16  17     2
## [4]    18  19     2
## [5]    23  24     2
## [6]    34  35     2
## [7]    41  42     2
## 
## $right
## IRanges of length 7
##     start end width
## [1]     3  12    10
## [2]    11  13     3
## [3]    18  19     2
## [4]    20  29    10
## [5]    25  24     0
## [6]    36  35     0
## [7]    43  46     4
ir + seq_len(length(ir))
## IRanges of length 7
##     start end width
## [1]     0  13    14
## [2]     6  15    10
## [3]    11  22    12
## [4]    11  33    23
## [5]    14  29    16
## [6]    28  41    14
## [7]    33  53    21
ir * -2
## IRanges of length 7
##     start end width
## [1]    -5  18    24
## [2]     5  16    12
## [3]    11  22    12
## [4]     7  36    30
## [5]    16  27    12
## [6]    33  36     4
## [7]    36  49    14

Making ranges disjoint

disjoin(ir)
## IRanges of length 10
##      start end width
## [1]      1   7     7
## [2]      8  12     5
## [3]     13  13     1
## [4]     14  14     1
## [5]     15  18     4
## [6]     19  19     1
## [7]     20  24     5
## [8]     25  29     5
## [9]     34  35     2
## [10]    40  46     7
plotRanges(disjoin(ir))




Other transformations

ir
## IRanges of length 7
##     start end width
## [1]     1  12    12
## [2]     8  13     6
## [3]    14  19     6
## [4]    15  29    15
## [5]    19  24     6
## [6]    34  35     2
## [7]    40  46     7
reflect(ir, IRanges(start(ir), width=width(ir)*2))
## IRanges of length 7
##     start end width
## [1]    13  24    12
## [2]    14  19     6
## [3]    20  25     6
## [4]    30  44    15
## [5]    25  30     6
## [6]    36  37     2
## [7]    47  53     7
flank(ir, width=seq_len(length(ir)))
## IRanges of length 7
##     start end width
## [1]     0   0     1
## [2]     6   7     2
## [3]    11  13     3
## [4]    11  14     4
## [5]    14  18     5
## [6]    28  33     6
## [7]    33  39     7

Set Operations

# intersect(), union(), setdiff()
# pintersect(), punion(), psetdiff()

gaps(ir, start = 1, end = 50)
## IRanges of length 3
##     start end width
## [1]    30  33     4
## [2]    36  39     4
## [3]    47  50     4
plotRanges(gaps(ir, start = 1, end = 50), c(1,50))

Vector Veiws

Creating Views

xViews <- Views(xRle, xRle >= 1)
xViews
## Views on a 10000000-length Rle subject
## 
## views:
##           start     end width
##     [1]     781     781     1 [1]
##     [2]     990     990     1 [1]
##     [3]    2590    2590     1 [1]
##     [4]    3474    3474     1 [1]
##     [5]    4513    4513     1 [1]
##     [6]    4515    4515     1 [2]
##     [7]    4518    4518     1 [1]
##     [8]    4520    4520     1 [1]
##     [9]    4522    4523     2 [1 1]
##     ...     ...     ...   ... ...
## [98555] 9998946 9998949     4 [8 1 1 1]
## [98556] 9998951 9998953     3 [2 1 3]
## [98557] 9998955 9998956     2 [2 2]
## [98558] 9998958 9998960     3 [2 2 1]
## [98559] 9998962 9998962     1 [1]
## [98560] 9998968 9998968     1 [1]
## [98561] 9998972 9998975     4 [1 1 1 2]
## [98562] 9998981 9998981     1 [1]
## [98563] 9999073 9999073     1 [1]
xViews <- slice(xRle, 1)
xViewsList <- slice(xRleList, 1)

Aggregating Views

head(viewSums(xViews))
## [1] 1 1 1 1 1 2
viewSums(xViewsList)
## IntegerList of length 2
## [[1]] 1 1 1 1 1 2 1 1 2 3 1 6 1 3 4 ... 11 12 6 37 10 8 11 6 4 5 1 1 5 1 1
## [[2]] 2 2 10 2 2 10 8 12 22 16 20 74 12 24 ... 2 12 2 6 4 2 2 4 2 2 2 2 2
head(viewMaxs(xViews))
## [1] 1 1 1 1 1 2
viewMaxs(xViewsList)
## IntegerList of length 2
## [[1]] 1 1 1 1 1 2 1 1 1 2 1 2 1 2 3 1 ... 4 3 5 2 5 6 2 8 3 2 2 1 1 2 1 1
## [[2]] 2 2 4 2 2 4 4 6 16 4 12 10 4 10 6 ... 6 4 2 4 2 4 2 2 2 4 2 2 2 2 2
Copyright © 2016 thetazero.com All Rights Reserved. Privacy Policy