桑基图(Sankey diagram)II

使用plotly

预处理

taxtab <- read.table('SRS014476-Supragingival_plaque_profile.txt', sep='\t', stringsAsFactors=FALSE)

second_last <- function(x) ifelse(length(x)>1, x[length(x)-1], NA)

taxtab <- taxtab[-grep('t__.*unclassified', taxtab$V1), ]   ## remove strain level
 
tax_split <- strsplit(taxtab$V1, '\\|')                ## split into different taxonomy levels
target <- sapply(tax_split, tail, n=1)                 ## target node
mapping <- data.frame(id=0:(length(target)-1), row.names=target)
target_id <- mapping$id
source <- sapply(tax_split, second_last)               ## source node
source_id <- mapping[source,]
value <- taxtab$V2                                     ## width of flow

links <- data.frame(source=source_id, target=target_id, value) ## links
links <- links[!is.na(links$source), ]                 ## root of the tree has no parent (source)
nodes <- data.frame(do.call(rbind, strsplit(target,'__')))
colnames(nodes)=c('tax', 'name')
nodes$tax <- factor(as.character(nodes$tax), levels=c('k','p','c','o','f','g','s'))
nodes$colors <- rainbow(length(levels(nodes$tax)))[as.numeric(nodes$tax)]
##links$colors <- nodes[links$source+1,3]
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
packageVersion('plotly')
## [1] '4.7.1'
p <- plot_ly(
    type = "sankey",
    domain = c(
      x =  c(0,1),
      y =  c(0,1)
    ),
    orientation = "h",
    valueformat = ".0f",
    valuesuffix = "TWh",
  width = 800, height = 200,
    node = list(
      label = nodes$name,
      color = nodes$colors,
      pad = 40,
      thickness = 15,
      line = list(
        color = "black",
        width = 0.5
      )
    ),

    link = list(
      source = links$source,
      target = links$target,
      value =  links$value,
      label =  links$value
    )
  ) %>% 
  layout(
    title = "Sankey Diagram of Relative Abundances",
    font = list(
      size = 7
    ),
    xaxis = list(showgrid = F, zeroline = F,tickmode='array',
                 tickvals=c(-1,0.25,1.375,2.5,3.625,4.75, 5.875),
                 tickfont=list(size=10),
                 ticktext=c('Kindom','Phylum','Class','Order','Family','Genus','Species')),
    yaxis = list(showgrid = F, zeroline = F, showticklabels =F)
)

p

Related

comments powered by Disqus