Chapter 2 Box Problems
## Box A
Problem a Let’s call PGI-2a ‘a’ and PGI-2b ‘b’
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 phenotypes <- c("aa" ,"ab" ,"bb" ) observed <- c(35 ,19 ,3 ) total <- sum(observed) Freq.a <- (2 *35 + 19 )/(2 *total) Freq.b <- (19 + 2 *3 )/(2 *total) expected <- c((Freq.a^2 )*total, 2 *(Freq.a*Freq.b)*total, (Freq.b^2 )*total) expected <- round(expected, 2 ) data.frame( cbind( phenotypes, observed, expected ))
Problem b
Set up a data.frame to hold the Drosophila mobility and inversion data
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 aGpdh <- c( rep("F" ,4 ), rep("S" ,4 )) amy <- rep(c("F" ,"F" ,"S" ,"S" ), 2 ) NS <- rep( c("non-NS" ,"NS" ), 4 ) counts <- c(726 ,90 ,111 ,1 ,172 ,32 ,26 ,0 ) results <- data.frame( cbind(aGpdh, amy, NS, counts), stringsAsFactors=FALSE ) results$counts <- as.numeric(results$counts) results
1 2 3 4 5 6 7 8 9 10 11 12 total <- sum(results$counts) total S.table <- results[ results$aGpdh=="S" ,] S.freq <- sum(S.table$counts)/total S.freq
1 2 3 F.table <- results[ results$aGpdh=="F" ,] F.freq <- sum(F.table$counts)/total F.freq
1 2 3 4 5 S.table <- results[ results$amy=="S" ,] S.freq <- sum(S.table$counts)/total S.freq
1 2 3 F.table <- results[ results$amy=="F" ,] F.freq <- sum(F.table$counts)/total F.freq
1 2 3 4 5 NS.table <- results[ results$NS=="NS" ,] NS.freq <- sum(NS.table$counts)/total NS.freq
1 2 3 nonNS.table <- results[ results$NS=="non-NS" ,] nonNS.freq <- sum(nonNS.table$counts)/total nonNS.freq
## Box C
Problem A
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 m <- 4 i <- c(0 ,1 ,2 ,3 ,m) s <- c( 0.5 , 0.5 , 0.5 , 0.5 , 0 ) b <- c(0 , 1 , 1.5 , 1 , 0 ) ce <- vector("numeric" , 5 ) ce
1 2 3 4 5 6 7 8 9 lambda <- 1 ce[1 ] <- lambda ce[2 ] <- s[1 ]*lambda^(-1 ) ce[3 ] <- s[1 ]*s[2 ]*lambda^(-2 ) ce[4 ] <- s[1 ]*s[2 ]*s[3 ]*lambda^(-3 ) ce[5 ] <- s[1 ]*s[2 ]*s[3 ]*s[4 ]*lambda^(-4 ) ce
Problem b
$$N_{t^{*}} = N_0\lambda^{t^{*}}=2N_0$$
$$N_0\lambda^{t^{*}}=2N_0$$
$$\lambda^{t^{*}}=2$$
$${t^{*}}\ln(\lambda)=\ln(2)$$
$$\ln(\lambda)=\frac{\ln(2)}{t^{*}}$$
$$\lambda=e^{(\frac{\ln(2)}{t^{*}})}$$
1 2 3 4 5 t.star <- 173 lambda <- exp(log(2 )/t.star) lambda
1 2 3 t.star <- 19.8 lambda <- exp(log(2 )/t.star) lambda
## Box D ##
Problem A Using the table I extract coefficients fromt the “Offspring Frequency” Aa column and apply to the “Frequency of Mating” column to obtain the frequency of heterozygotes in the next generation, Q’:
$$Q’ = \frac{1}{2}(2PQ) + 2PR + \frac{1}{2}Q^2 + \frac{1}{2}(2QR)$$
$$ = PQ + 2PR + \frac{1}{2}Q^2 + QR$$
Factor out Q/2 + R
$$= 2P(\frac{Q}{2} + R) + Q(\frac{Q}{2}+R)$$ $$= (2P + Q)(\frac{Q}{2} + R)$$ $$= 2(P + \frac{Q}{2})( \frac{Q}{2} + R)$$ Given that p = P + Q/2 and q= Q/2+R = 2pq
## Box E ##
Problem A 1 2 3 4 5 6 7 8 9 10 11 total <- 2060 O <- 702 /total A <- 862 /total B <- 365 /total AB <- 131 /total p <- 1 -sqrt(B+O) q <- 1 -sqrt(A+O) r <- sqrt(O) p
Problem B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 theta <- 1 -p-q-r theta p.final <- p*(1 +theta/2 ) q.final <- q*(1 +theta/2 ) r.final <- (r+theta/2 )*(1 +theta/2 ) p.final q.final r.final p.final+q.final+r.final
Problem C 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 O.expected <- r.final^2 O.expected*total A.expected <- p.final^2 + 2 *p.final*r.final A.expected*total B.expected <- q.final^2 + 2 *q.final*r.final B.expected*total AB.expected <- 2 *p.final*q.final AB.expected*total O.expected + A.expected + B.expected + AB.expected
Problem D 1 2 3 4 observed <- c(O,A,B,AB) expected <- c(O.expected,A.expected,B.expected,AB.expected) chisq.test( observed, p = expected, correct=FALSE )
1 2 chisq.test( observed, p = expected, rescale.p=TRUE , correct=FALSE )
1 2 observed <- c(701 ,862 ,365 ,131 ) chisq.test( observed, p = expected, rescale.p=TRUE , correct=FALSE )
Though the \(X^2\) value is correct I cannot modify the degrees of freedom using chisq.test. Try a manual maximum liklihood instead.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 n.A <- 862 n.AB <- 131 n.B <- 365 n.OO<- 702 N <- sum( n.A, n.AB, n.B, n.OO) p.a <- 0.33 p.b <- 0.33 p.o <- 0.34 num.iter <- 6 results <- data.frame( matrix( nrow=num.iter, ncol=10 )) names(results) <- c("iter" ,"Naa" ,"Nao" ,"Nbb" ,"Nbo" ,"Nab" ,"Noo" ,"p.a" ,"p.b" ,"p.o" ) for ( i in 1 :num.iter){ Naa <- n.A*(p.a^2 /(p.a^2 +2 *p.a*p.o)) Nao <- n.A*((2 *p.a*p.o)/(p.a^2 +2 *p.a*p.o)) Nbb <- n.B*(p.b^2 /(p.b^2 +2 *p.b*p.o)) Nbo <- n.B*((2 *p.b*p.o)/(p.b^2 +2 *p.b*p.o)) Nab <- n.AB Noo <- n.OO p.a <- (2 *Naa + Nao + Nab)/(2 *N) p.b <- (2 *Nbb + Nbo + Nab)/(2 *N) p.o <- (2 *Noo + Nao + Nbo)/(2 *N) results[i,] <- c(i,Naa,Nao,Nbb,Nbo,Nab,Noo,p.a,p.b,p.o) } results
Converges to 3 significant figures after about 4 iterations.
## Box F ##
Problem A Given:
$$m_n = f_{n-1}$$
$$f_n = \frac{1}{2}(m_{n-1} + f_{n-1})$$
Then:
$$f_n - m_n = \frac{1}{2}(m_{n-1} + f_{n-1}) - f_{n-1}$$
$$f_n - m_n = \frac{1}{2}(m_{n-1} + f_{n-1} - 2f_{n-1})$$
$$f_n - m_n = \frac{1}{2}(m_{n-1} - f_{n-1})$$
$$f_n - m_n = -\frac{1}{2}(f_{n-1} - m_{n-1})$$
Problem B Given the expression for the current generation:
$$\frac{2}{3}(f_n) + \frac{1}{3}(m_n)$$
Substitute in:
$$m_{n} = f_{n-1}$$
$$f_n = \frac{1}{2}(m_{n-1} + f_{n-1})$$
to get:
$$\frac{2}{3}[\frac{1}{2}(m_{n-1}+f_{n-1})]+\frac{1}{3}(f_{n-1})$$
$$\frac{1}{3}(m_{n-1}+f_{n-1})+\frac{1}{3}(f_{n-1})$$
$$\frac{1}{3}m_{n-1}+\frac{2}{3}(f_{n-1})$$
Which is the expression for the previous generation - the same expression as the current generation.
Problem C Set up a vector to handle the frequencies, noting that the vector index will be one off from the generation.
1 2 3 4 5 6 7 8 9 10 11 12 13 m <- vector( mode="numeric" , length = 7 ) f <- vector( mode="numeric" , length = 7 ) m[1 ] <- 0.2 f[1 ] <- 0.8 for ( i in 2 :7 ){ m[i] <- f[i-1 ] f[i] <- 0.5 *(m[i-1 ] + f[i-1 ]) } results <- data.frame( cbind(m, f)) results
1 2 3 4 p <- ((2 /3 )*f[1 ] + (1 /3 )*m[1 ]) q <- 1 -p p; q
## Box G ##
Problem A
A1 allele frequency $$p_1 = P_{11} + P_{12}$$
A2 allel frequency $$p_2 = P_{21} + P_{22}$$
B1 allele frequency $$q1 = P_{11} + P_{21}$$
B2 allel frequency $$q2 = P_{12} + P_{22}$$
disequilibrium parameter $$D = P_{11}*P_{22} - P_{12}*P_{21}$$
Show that $P_{11} = p_1q_1 + D$
Substitute for $p_1, q_1, D$
$$P_{11} = (P_{11} + P_{12})(P_{11} + p_{21}) + (P_{11}*P_{22} - P_{12}*p_{21})$$
$$P_{11} = P_{11}*P_{11} + P_{11}*p_{21} + P_{12}*P_{11} + P_{12}*p_{21} + P_{11}*P_{22} - P_{12}*p_{21}$$
$$P_{11} = P_{11}*P_{11} + P_{11}*p_{21} + P_{12}*P_{11} + P_{11}*P_{22}$$
$$P_{11} = P_{11}*(P_{11} + p_{21} + P_{12} + P_{22})$$
Noting that $$P_{11} + P_{21} + P_{12} + P_{22} = 1$$
$$P_{11} = P_{11}*1$$
Problem D 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 aGpdh <- c( rep("F" ,4 ), rep("S" ,4 )) amy <- rep(c("F" ,"F" ,"S" ,"S" ), 2 ) NS <- rep( c("non-NS" ,"NS" ), 4 ) counts <- c(726 ,90 ,111 ,1 ,172 ,32 ,26 ,0 ) results <- data.frame( cbind(aGpdh, amy, NS, counts), stringsAsFactors=FALSE ) results.w <- reshape( results, idvar= c("amy" ,"NS" ), v.names="counts" , timevar = "aGpdh" , direction="wide" ) results.w$counts.sum <- as.numeric(results.w$counts.F) + as.numeric(results.w$counts.S) results.w
1 2 3 total <- sum(results.w$counts.sum) total
1 2 3 4 5 6 7 8 9 10 S.table <- results.w[ results.w$amy=="S" ,] S.freq <- sum(S.table$counts.sum)/total S.freq F.table <- results.w[ results.w$amy=="F" ,] F.freq <- sum(F.table$counts.sum)/total F.freq
1 2 3 4 5 NS.table <- results.w[ results.w$NS=="NS" ,] NS.freq <- sum(NS.table$counts.sum)/total NS.freq
1 2 3 nonNS.table <- results.w[ results.w$NS=="non-NS" ,] nonNS.freq <- sum(nonNS.table$counts.sum)/total nonNS.freq
1 2 3 4 5 results.w[ results.w$amy=="F" & results.w$NS=="non-NS" ,]
1 2 D <- results.w[ results.w$amy=="F" & results.w$NS=="non-NS" , "counts.sum" ]/total - F.freq*nonNS.freq D
1 2 3 4 rho <- D/sqrt(S.freq*F.freq*NS.freq*nonNS.freq ) rho
1 2 Chi.square <- rho^2 *total Chi.square
1 2 observed <- results.w[, "counts.sum" ] observed
1 2 3 4 5 expected <- c( (nonNS.freq*F.freq/total), (NS.freq*F.freq/total), (nonNS.freq*S.freq/total), (NS.freq*S.freq/total)) expected
1 chisq.test( observed, p = expected, rescale.p=TRUE , correct=FALSE )
Problem E This is the same as E only now we use aGpdh instead of amy
1 2 3 4 5 6 7 8 results.w <- reshape( results, idvar= c("aGpdh" ,"NS" ), v.names="counts" , timevar = "amy" , direction="wide" ) results.w$counts.sum <- as.numeric(results.w$counts.F) + as.numeric(results.w$counts.S) results.w
1 2 3 total <- sum(results.w$counts.sum) total
1 2 3 4 5 S.table <- results.w[ results.w$aGpdh=="S" ,] S.freq <- sum(S.table$counts.sum)/total S.freq
1 2 3 F.table <- results.w[ results.w$aGpdh=="F" ,] F.freq <- sum(F.table$counts.sum)/total F.freq
1 2 3 4 5 NS.table <- results.w[ results.w$NS=="NS" ,] NS.freq <- sum(NS.table$counts.sum)/total NS.freq
1 2 3 nonNS.table <- results.w[ results.w$NS=="non-NS" ,] nonNS.freq <- sum(nonNS.table$counts.sum)/total nonNS.freq
1 2 3 4 5 6 results.w[ results.w$aGpdh=="F" & results.w$NS=="non-NS" ,]
1 2 D <- results.w[ results.w$aGpdh=="F" & results.w$NS=="non-NS" , "counts.sum" ]/total - F.freq*nonNS.freq D
1 2 3 4 rho <- D/sqrt(S.freq*F.freq*NS.freq*nonNS.freq ) rho
1 2 Chi.square <- rho^2 *total Chi.square
1 2 observed <- results.w[, "counts.sum" ] observed
1 2 3 4 5 expected <- c( (nonNS.freq*F.freq/total), (NS.freq*F.freq/total), (nonNS.freq*S.freq/total), (NS.freq*S.freq/total)) expected
1 chisq.test( observed, p = expected, rescale.p=TRUE , correct=FALSE )
Note that the degrees of freedom of 3 used by R is inappropriate. For 1 degree of freedon (4 - 1 (sample size) -1 (estimating p1) -1 (estimating p2) = 1 ) you must read a p ~0.07 off a chi square table. Do not reject the null hypothesis ( independence or linkage equilibrium) and so conclude linkage equilibrium.
## Box H ##
Problem A For an autosomal gene the paths are:
GC
A E: $(\frac{1}{2})^4*(1+1) = \frac{1}{16}*2 = \frac{8}{64}$
GD
A E: $(\frac{1}{2})^4*(1+1) = \frac{1}{16}*2 = \frac{8}{64}$
GD
B E: $(\frac{1}{2})^4*(1+\frac{1}{4}) = \frac{1}{16}*\frac{5}{4} = \frac{5}{64}$
Total: $\frac{8}{64} + \frac{8}{64} + \frac{5}{64} = \frac{21}{64}$
Problem B For a sex linked gene:
GCA E: CAE are male so this path is not included GDA E: AE are male so this path is not included
GD
B E: $(\frac{1}{2})^{3}*(1+\frac{1}{4}) = \frac{1}{8}*\frac{5}{4} = \frac{5}{32}$
Total: $\displaystyle \frac{5}{32}$