2016-04-24 244 views
0

我有這個數據稱爲mydf我有混合樣本比較的效率。對於sampleA和sampleB的混合,有七個不同的效率列。我想看看這七種效率的情節,看看哪些效率水平與前幾列相比會大幅下降。如何繪製R中曲線的漸近線?

mydf<-structure(list(sample_A = structure(c(1L, 2L, 2L, 2L, 3L, 4L), .Label = c("2568", 
"2669", "2670", "2671", "2946", "LPH-001-10_AK1", "LPH-001-12_AK2", 
"LPH-001-9"), class = "factor"), sample_B = structure(c(1L, 2L, 
3L, 4L, 3L, 4L), .Label = c("2568", "2669", "2670", "2671", "2946", 
"LPH-001-10_AK1", "LPH-001-12_AK2", "LPH-001-9"), class = "factor"), 
    efficiency = c(1.02, 0.964, 0.415, 0.422, 0.98, 0.986), efficiency2 = c(1, 
    0.944, 0.395, 0.402, 0.96, 0.966), efficiency3 = c(0.9, 0.844, 
    0.295, 0.302, 0.86, 0.866), efficiency4 = c(0.32, 0.264, 
    -0.285, -0.278, 0.28, 0.286), efficiency5 = c(0.02, -0.0360000000000001, 
    -0.585, -0.578, -0.0200000000000001, -0.0140000000000001), 
    efficiency6 = c(0.12, 0.0639999999999999, -0.485, -0.478, 
    0.08, 0.086), efficiency7 = c(0.02, -0.036, -0.585, -0.578, 
    -0.02, -0.014)), .Names = c("sample_A", "sample_B", "efficiency", 
"efficiency2", "efficiency3", "efficiency4", "efficiency5", "efficiency6", 
"efficiency7"), row.names = c(NA, 6L), class = "data.frame") 
+0

沒有'漸近線(mydf)'工作? – rawr

+0

@rawr不,它不。 – MAPK

回答

4

這裏是繪製你的數據的一種方法:

mydf <- structure(list(sample_A=structure(c(1L,2L,2L,2L,3L,4L),.Label=c('2568','2669','2670','2671','2946','LPH-001-10_AK1','LPH-001-12_AK2','LPH-001-9'),class='factor'),sample_B=structure(c(1L,2L,3L,4L,3L,4L),.Label=c('2568','2669','2670','2671','2946','LPH-001-10_AK1','LPH-001-12_AK2','LPH-001-9'),class='factor'),efficiency=c(1.02,0.964,0.415,0.422,0.98,0.986),efficiency2=c(1,0.944,0.395,0.402,0.96,0.966),efficiency3=c(0.9,0.844,0.295,0.302,0.86,0.866),efficiency4=c(0.32,0.264,-0.285,-0.278,0.28,0.286),efficiency5=c(0.02,-0.0360000000000001,-0.585,-0.578,-0.0200000000000001,-0.0140000000000001),efficiency6=c(0.12,0.0639999999999999,-0.485,-0.478,0.08,0.086),efficiency7=c(0.02,-0.036,-0.585,-0.578,-0.02,-0.014)),.Names=c('sample_A','sample_B','efficiency','efficiency2','efficiency3','efficiency4','efficiency5','efficiency6','efficiency7'),row.names=c(NA,6L),class='data.frame'); 
effCis <- grep('^efficiency',names(mydf)); 
xlim <- c(1,length(effCis)); 
ylim <- range(mydf[,effCis],na.rm=T); 
ylim[1L] <- floor(ylim[1L]/0.1)*0.1; 
ylim[2L] <- ceiling(ylim[2L]/0.1)*0.1; 
xticks <- seq_along(effCis); 
yticks <- seq(ylim[1L],ylim[2L],0.1); 
plot(NA,xlim=xlim,ylim=ylim,xlab='measurement',ylab='efficiency',xaxs='i',yaxs='i',axes=F); 
abline(v=xticks,col='lightgrey'); 
abline(h=yticks,col='lightgrey'); 
abline(h=0,lwd=2); 
axis(1L,xticks,xticks,font=2L,cex.axis=0.7); 
axis(2L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7); 
hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),col=c('red','green','blue','gold','cyan','magenta'),stringsAsFactors=F); 
splineN <- 200L; 
for (ri in seq_len(nrow(hybrid.col))) { 
    hybrid <- hybrid.col$hybrid[ri]; 
    col <- hybrid.col$col[ri]; 
    x <- xticks; 
    y <- c(as.matrix(mydf[hybrid,effCis])); 
    points(x,y,pch=16L,col=col,xpd=NA); 
    with(spline(x,y,splineN),{ 
     lines(x,y,col=col,lwd=2,xpd=NA); 
     localwin <- which(x>2 & x<3); 
     tp <- which.min(abs(diff(y[localwin]))); 
     if (length(tp)>0L) points(x[localwin[tp]],y[localwin[tp]],col=col,pch=4L); 
     localwin <- which(x>2 & x<5); 
     tp <- which.min(diff(y[localwin])); 
     if (length(tp)>0L) { 
      m <- diff(y[localwin[seq(tp,len=2L)]])/diff(x[localwin[seq(tp,len=2L)]]); 
      if (is.finite(m)) abline(y[localwin[tp]]-m*x[localwin[tp]],m,col=col,lty=2L); 
     }; 
    }); 
}; 
legend(5.5,0.95,paste0(mydf$sample_A,'/',mydf$sample_B),fill=hybrid.col$col,cex=0.7,title='hybrid'); 

plot


我肯定不是100%,你的漸近線是什麼意思。我最初以爲也許你想要曲線的局部最大值在它們開始下降之前,這就是爲什麼我用點(符號X,即pch=4L)標記局部最大值。但後來我意識到,也許你的意思是沿着下降的切線,所以我添加了與最陡的斜坡點相切的線。

這是漸近線的定義:

a straight line approached by a given curve as one of the variables in the equation of the curve approaches infinity.

我不認爲這裏這是適用的;繪製這些數據並不涉及任何事物的無限性。我想你想要的局部最大值或切線。

+0

@bgoldst非常感謝,我不會期望這樣一個好的答案。 – MAPK

+0

我遇到了麻煩與我的數據框。我有7346行,有一些單元有'NAs'。我用'0's替換了NAs,所以應該解決這個問題,但是我得到這個錯誤:'xy.coords(x,y)中的錯誤:'x'和'y'長度不同'。任何建議?謝謝 – MAPK

+0

這是mydata,請指出它有什麼問題:https://www.dropbox.com/s/qwcb3nxww0371c0/test.csv?dl=0 – MAPK