预处理
taxtab <- read.table('SRS014476-Supragingival_plaque_profile.txt', sep='\t', stringsAsFactors=FALSE)
second_last <- function(x) ifelse(length(x)>1, x[length(x)-1], NA)
taxtab <- taxtab[-grep('t__.*unclassified', taxtab$V1), ]
tax_split <- strsplit(taxtab$V1, '\\|')
target <- sapply(tax_split, tail, n=1)
mapping <- data.frame(id=0:(length(target)-1), row.names=target)
target_id <- mapping$id
source <- sapply(tax_split, second_last)
source_id <- mapping[source,]
value <- taxtab$V2
links <- data.frame(source=source_id, target=target_id, value)
links <- links[!is.na(links$source), ]
nodes <- data.frame(do.call(rbind, strsplit(target,'__')))
colnames(nodes)=c('tax', 'name')
nodes$tax <- factor(as.character(nodes$tax), levels=c('k','p','c','o','f','g','s'))
nodes$colors <- rainbow(length(levels(nodes$tax)))[as.numeric(nodes$tax)]
library(plotly)
#
#
#
#
#
#
#
#
#
#
#
#
packageVersion('plotly')
#
p <- plot_ly(
type = "sankey",
domain = c(
x = c(0,1),
y = c(0,1)
),
orientation = "h",
valueformat = ".0f",
valuesuffix = "TWh",
width = 800, height = 200,
node = list(
label = nodes$name,
color = nodes$colors,
pad = 40,
thickness = 15,
line = list(
color = "black",
width = 0.5
)
),
link = list(
source = links$source,
target = links$target,
value = links$value,
label = links$value
)
) %>%
layout(
title = "Sankey Diagram of Relative Abundances",
font = list(
size = 7
),
xaxis = list(showgrid = F, zeroline = F,tickmode='array',
tickvals=c(-1,0.25,1.375,2.5,3.625,4.75, 5.875),
tickfont=list(size=10),
ticktext=c('Kindom','Phylum','Class','Order','Family','Genus','Species')),
yaxis = list(showgrid = F, zeroline = F, showticklabels =F)
)
p