@@ -34,7 +34,7 @@ setGeneric(".powerLaw", function(tag.counts, fitInRange = c(10, 1000), alpha = 1
34
34
standardGeneric(" .powerLaw" )})
35
35
36
36
setMethod (".powerLaw ", "numeric", function (tag.counts, fitInRange, alpha, T) {
37
- fit.coef <- .fit.power.law.to.reverse.cumulative( values = tag.counts , val.range = fitInRange )
37
+ fit.coef <- fitPowerLaw( tag.counts , fitInRange )
38
38
.normalize.to.reference.power.law.distribution(values = tag.counts , lin.reg.coef = fit.coef , alpha = alpha , T = T )
39
39
})
40
40
@@ -50,60 +50,108 @@ setMethod(".powerLaw", "DataFrame", function (tag.counts, fitInRange, alpha, T)
50
50
DataFrame(sapply(tag.counts , function (X ) .powerLaw(X , fitInRange , alpha , T )))
51
51
})
52
52
53
- # ' @name .fit. power. law.to.reverse.cumulative
53
+ # ' Fit a power law to the CAGE data
54
54
# '
55
55
# ' Function that fits power-law distribution to reverse cumulative of given
56
- # ' values - fitting is done using only the range specified in val. range
56
+ # ' values. Fitting is done using only a specified range.
57
57
# '
58
- # ' @return Two coefficients describing fitted power-law distribution (a = slope
59
- # ' in the logX-logY plot (where X=signal, Y=number of sites that have >= of given
60
- # ' signal), b = intercept in the logX-logY plot)
58
+ # ' @param x The object containing the data.
59
+ # ' @param fitInRange The range in which to fit the power law.
60
+ # '
61
+ # ' @return Returns the slope (`plSlope`) and the intercept (`plInt`) of the
62
+ # ' fitted power-law distribution. If the input is a single sample, function
63
+ # ' returns a numeric vector. If the input is multiple samples, the function
64
+ # ' returns a `DataFrame`. If the input is a whole _CAGEexp_ object, the
65
+ # ' function returns the object with information added to its `colData`.
61
66
# '
62
- # ' @importFrom data.table data.table setkeyv setnames
63
67
# ' @importFrom stats coefficients lm
64
- # ' @noRd
68
+ # '
69
+ # ' @author Vanja Haberle
70
+ # ' @author Charles Plessy
71
+ # '
72
+ # ' @export
65
73
66
- .fit.power.law.to.reverse.cumulative <- function (values , val.range = c(10 , 1000 )) {
74
+ setGeneric ("fitPowerLaw ", function(x, fitInRange = c(10, 1000)) {
75
+ standardGeneric(" fitPowerLaw" )})
67
76
68
- # using data.table package
69
- v <- data.table(num = 1 , nr_tags = values )
70
- num <- nr_tags <- NULL # Keep R CMD check happy.
71
- v <- v [, sum(num ), by = nr_tags ]
72
- setkeyv(v , " nr_tags" )
73
- v $ V1 <- rev(cumsum(rev(v $ V1 )))
74
- setnames(v , c(' nr_tags' , ' reverse_cumulative' ))
75
-
76
- # check if range values are > 0
77
- if (any(val.range < 1 )){
78
- stop(paste(" The range of values for fitting the power law" ,
79
- " arg 'fitInRange', expects integers > 0." ))
80
- }
81
-
82
- v <- v [nr_tags > = min(val.range ) & nr_tags < = max(val.range )]
83
-
84
- # check if specified range values have at least 1 entry in v
85
- if (nrow(v ) < 1 ){
86
- stop(paste(" Selected range for fitting the power law does not contain any" ,
87
- " tag count values. Consider changing/increasing 'fitInRange'." ))
88
- }
89
- # v <- aggregate(values, by = list(values), FUN = length)
90
- # v$x <- rev(cumsum(rev(v$x)))
91
- # colnames(v) <- c('nr_tags', 'reverse_cumulative')
92
- # v <- subset(v, nr_tags >= min(val.range) & nr_tags <= max(val.range))
93
-
94
- lin.m <- lm(log(reverse_cumulative ) ~ log(nr_tags ), data = v )
95
-
96
- a <- coefficients(lin.m )[2 ]
97
- b <- coefficients(lin.m )[1 ]
98
-
99
- # check if specified range values have >1 entries in v
100
- if (is.na(a ) && b == 0 ){
101
- stop(paste(" Selected range for fitting the power law does not" ,
102
- " contain enough values. Consider changing/increasing 'fitInRange'." ))
103
- }
104
- return (c(a , b ))
105
- }
77
+ # ' @rdname fitPowerLaw
78
+ # ' @examples
79
+ # ' exampleCAGEexp |> fitPowerLaw() |> colData()
106
80
81
+ setMethod ("fitPowerLaw ", "CAGEexp", function (x, fitInRange) {
82
+ DF <- fitPowerLaw(CTSStagCountDF(x ), fitInRange )
83
+ colData(x ) <- cbind(colData(x ), DF )
84
+ x
85
+ })
86
+
87
+ # ' @rdname fitPowerLaw
88
+ # ' @examples
89
+ # ' exampleCAGEexp |> CTSStagCountDF() |> fitPowerLaw()
90
+
91
+ setMethod ("fitPowerLaw ", "DataFrame", function (x, fitInRange) {
92
+ m <- sapply(x , fitPowerLaw , fitInRange )
93
+ DF <- as(t(m ), " DataFrame" )
94
+ rownames(DF ) <- names(x )
95
+ DF
96
+ })
97
+
98
+ # ' @rdname fitPowerLaw
99
+ # ' @examples
100
+ # ' exampleCAGEexp |> CTSStagCountGR('all') |> fitPowerLaw()
101
+
102
+ setMethod ("fitPowerLaw ", "GRangesList", function (x, fitInRange) {
103
+ m <- sapply(x , fitPowerLaw , fitInRange )
104
+ DF <- as(t(m ), " DataFrame" )
105
+ rownames(DF ) <- names(x )
106
+ DF
107
+ })
108
+
109
+ # ' @rdname fitPowerLaw
110
+ # ' @examples
111
+ # ' exampleCAGEexp |> CTSStagCountGR(1) |> fitPowerLaw()
112
+
113
+ setMethod ("fitPowerLaw ", "GRanges", function (x, fitInRange) {
114
+ fitPowerLaw(score(x ), fitInRange )
115
+ })
116
+
117
+ # ' @rdname fitPowerLaw
118
+ # ' @examples
119
+ # ' exampleCAGEexp |> CTSStagCountGR(1) |> score() |> fitPowerLaw()
120
+
121
+ setMethod ("fitPowerLaw ", "Rle", function (x, fitInRange) {
122
+ .fit.power.law.to.reverse.cumulative(x , fitInRange )
123
+ })
124
+
125
+ # ' @rdname fitPowerLaw
126
+ # ' @examples
127
+ # ' exampleCAGEexp |> CTSStagCountGR(1) |> score() |> decode() |> fitPowerLaw()
128
+
129
+ setMethod ("fitPowerLaw ", "numeric", function(x, fitInRange) {
130
+ .fit.power.law.to.reverse.cumulative(x , fitInRange )
131
+ })
132
+
133
+ .fit.power.law.to.reverse.cumulative <- function (values , val.range = c(10 , 1000 )) {
134
+ # Using Rle
135
+ v <- sort(Rle(values ), decr = TRUE )
136
+ nr_tags <- rev(runValue(v ))
137
+ reverse_cumulative <- v | > runLength() | > cumsum() | > rev()
138
+
139
+ inRange <- nr_tags > = min(val.range ) & nr_tags < = max(val.range )
140
+
141
+ lin.m <- lm(log(reverse_cumulative [inRange ]) ~ log(nr_tags [inRange ]))
142
+
143
+ a <- coefficients(lin.m )[2 ]
144
+ names(a ) <- ' plSlope'
145
+ b <- coefficients(lin.m )[1 ]
146
+ names(b ) <- ' plInt'
147
+
148
+ # check if specified range values have >1 entries in v
149
+ if (is.na(a ) && b == 0 ){
150
+ stop(paste(" Selected range for fitting the power law does not" ,
151
+ " contain enough values. Consider changing/increasing 'fitInRange'." ))
152
+ }
153
+ c(a , b )
154
+ }
107
155
108
156
# ' .normalize.to.reference.power.law.distribution
109
157
# '
0 commit comments