简介
CuratedMetagenomicData(https://waldronlab.github.io/curatedMetagenomicData/)的目标是用标准化的流程(MetaPhlan2、HUMAnN2)分析已发表的宏基因组数据并建立一个统一的数据集合。目前已经收录6000余个样本并在持续扩建当中。样本涵盖糖尿病、肥胖症、IBD等多种疾病,涉及皮肤、口腔、粪便等多处样本。
完整帮助信息
安装
通过Bioconductor安装最新版本
## try http:// if https:// URLs are not supported
source("https://bioconductor.org/biocLite.R")
useDevel()
biocLite("curatedMetagenomicData")
基本使用
调用
suppressPackageStartupMessages(library(curatedMetagenomicData))
查看metadata
combined_metadata
## # A tibble: 6,058 x 80
## dataset_name sampleID subjectID body_site antibiotics_current_use
## <chr> <chr> <chr> <chr> <chr>
## 1 AsnicarF_2017 MV_FEI1_t1Q14 MV_FEI1 stool <NA>
## 2 AsnicarF_2017 MV_FEI2_t1Q14 MV_FEI2 stool <NA>
## 3 AsnicarF_2017 MV_FEI3_t1Q14 MV_FEI3 stool <NA>
## 4 AsnicarF_2017 MV_FEI4_t1Q14 MV_FEI4 stool <NA>
## 5 AsnicarF_2017 MV_FEI4_t2Q15 MV_FEI4 stool <NA>
## 6 AsnicarF_2017 MV_FEI5_t1Q14 MV_FEI5 stool <NA>
## 7 AsnicarF_2017 MV_FEI5_t2Q14 MV_FEI5 stool <NA>
## 8 AsnicarF_2017 MV_FEI5_t3Q15 MV_FEI5 stool <NA>
## 9 AsnicarF_2017 MV_FEM1_t1Q14 MV_FEM1 stool <NA>
## 10 AsnicarF_2017 MV_FEM2_t1Q14 MV_FEM2 stool <NA>
## # ... with 6,048 more rows, and 75 more variables: study_condition <chr>,
## # disease <chr>, age <int>, infant_age <int>, age_category <chr>,
## # gender <chr>, country <chr>, non_westernized <chr>,
## # sequencing_platform <chr>, DNA_extraction_kit <chr>, PMID <chr>,
## # number_reads <int>, number_bases <dbl>, minimum_read_length <int>,
## # median_read_length <dbl>, pregnant <chr>, lactating <chr>,
## # NCBI_accession <chr>, BMI <dbl>, antibiotics_family <chr>,
## # momeducat <int>, alcohol <chr>, `flg-genotype` <chr>,
## # disease_subtype <chr>, hdl <dbl>, triglycerides <dbl>, hba1c <dbl>,
## # ldl <dbl>, tnm <chr>, body_subsite <chr>, visit_number <int>,
## # days_from_first_collection <int>, `c-peptide` <dbl>, family <int>,
## # cholesterol <dbl>, glucose <dbl>, mumps <chr>, adiponectin <dbl>,
## # `insulin(cat)` <chr>, `fgf-19` <dbl>, hscrp <dbl>, leptin <dbl>,
## # glutamate_decarboxylase_2_antibody <dbl>, creatinine <dbl>,
## # `il-1` <dbl>, cd163 <dbl>, `glp-1` <dbl>, hitchip_probe_class <chr>,
## # hitchip_probe_number <int>, protein_intake <dbl>,
## # days_after_onset <int>, stec_count <chr>, shigatoxin_2_elisa <chr>,
## # stool_texture <chr>, ferm_milk_prod_consumer <chr>,
## # mgs_richness <dbl>, location <chr>, dyastolic_p <dbl>,
## # systolic_p <dbl>, prothrombin_time <int>, creatine <dbl>, inr <dbl>,
## # ctp <int>, albumine <dbl>, bilubirin <dbl>, smoker <chr>,
## # ever_smoker <chr>, birth_control_pil <chr>, hla_drb12 <int>,
## # hla_dqa12 <int>, hla_dqa11 <int>, hla_drb11 <int>,
## # start_solidfood <int>, ajcc <chr>, fobt <chr>
查看metadata参数
colnames(combined_metadata)
## [1] "dataset_name"
## [2] "sampleID"
## [3] "subjectID"
## [4] "body_site"
## [5] "antibiotics_current_use"
## [6] "study_condition"
## [7] "disease"
## [8] "age"
## [9] "infant_age"
## [10] "age_category"
## [11] "gender"
## [12] "country"
## [13] "non_westernized"
## [14] "sequencing_platform"
## [15] "DNA_extraction_kit"
## [16] "PMID"
## [17] "number_reads"
## [18] "number_bases"
## [19] "minimum_read_length"
## [20] "median_read_length"
## [21] "pregnant"
## [22] "lactating"
## [23] "NCBI_accession"
## [24] "BMI"
## [25] "antibiotics_family"
## [26] "momeducat"
## [27] "alcohol"
## [28] "flg-genotype"
## [29] "disease_subtype"
## [30] "hdl"
## [31] "triglycerides"
## [32] "hba1c"
## [33] "ldl"
## [34] "tnm"
## [35] "body_subsite"
## [36] "visit_number"
## [37] "days_from_first_collection"
## [38] "c-peptide"
## [39] "family"
## [40] "cholesterol"
## [41] "glucose"
## [42] "mumps"
## [43] "adiponectin"
## [44] "insulin(cat)"
## [45] "fgf-19"
## [46] "hscrp"
## [47] "leptin"
## [48] "glutamate_decarboxylase_2_antibody"
## [49] "creatinine"
## [50] "il-1"
## [51] "cd163"
## [52] "glp-1"
## [53] "hitchip_probe_class"
## [54] "hitchip_probe_number"
## [55] "protein_intake"
## [56] "days_after_onset"
## [57] "stec_count"
## [58] "shigatoxin_2_elisa"
## [59] "stool_texture"
## [60] "ferm_milk_prod_consumer"
## [61] "mgs_richness"
## [62] "location"
## [63] "dyastolic_p"
## [64] "systolic_p"
## [65] "prothrombin_time"
## [66] "creatine"
## [67] "inr"
## [68] "ctp"
## [69] "albumine"
## [70] "bilubirin"
## [71] "smoker"
## [72] "ever_smoker"
## [73] "birth_control_pil"
## [74] "hla_drb12"
## [75] "hla_dqa12"
## [76] "hla_dqa11"
## [77] "hla_drb11"
## [78] "start_solidfood"
## [79] "ajcc"
## [80] "fobt"
获取人体各部位样本概况
table(combined_metadata$body_site)
##
## milk nasalcavity oralcavity skin stool vagina
## 8 91 678 466 4810 5
获取包含的数据集
table(combined_metadata$dataset_name)
##
## AsnicarF_2017 BritoIL_2016 Castro-NallarE_2015
## 24 312 32
## ChngKR_2016 FengQ_2015 HanniganGD_2017
## 78 154 82
## Heitz-BuschartA_2016 HMP_2012 KarlssonFH_2013
## 53 749 145
## LeChatelierE_2013 LiJ_2014 LiuW_2016
## 292 260 110
## LomanNJ_2013 NielsenHB_2014 Obregon-TitoAJ_2015
## 43 396 58
## OhJ_2014 QinJ_2012 QinN_2014
## 291 363 237
## RampelliS_2015 RaymondF_2016 SchirmerM_2016
## 38 72 471
## TettAJ_2016 VatanenT_2016 VincentC_2016
## 97 785 229
## VogtmannE_2016 XieH_2016 YuJ_2015
## 110 250 128
## ZellerG_2014
## 199
下载HMP 2012(HMP I)的MetaPhlAn2分析结果
dat <- curatedMetagenomicData("HMP_2012.metaphlan_bugs_list.*", dryrun=FALSE)
## Working on HMP_2012.metaphlan_bugs_list.nasalcavity
## Warning in strptime(x, fmt, tz = "GMT"): unknown timezone 'zone/tz/2017c.
## 1.0/zoneinfo/Asia/Singapore'
## snapshotDate(): 2017-10-30
## see ?curatedMetagenomicData and browseVignettes('curatedMetagenomicData') for documentation
## loading from cache '/Users/mayuan//.ExperimentHub/424'
## Working on HMP_2012.metaphlan_bugs_list.oralcavity
## snapshotDate(): 2017-10-30
## see ?curatedMetagenomicData and browseVignettes('curatedMetagenomicData') for documentation
## loading from cache '/Users/mayuan//.ExperimentHub/425'
## Working on HMP_2012.metaphlan_bugs_list.stool
## snapshotDate(): 2017-10-30
## see ?curatedMetagenomicData and browseVignettes('curatedMetagenomicData') for documentation
## loading from cache '/Users/mayuan//.ExperimentHub/426'
## Working on HMP_2012.metaphlan_bugs_list.vagina
## snapshotDate(): 2017-10-30
## see ?curatedMetagenomicData and browseVignettes('curatedMetagenomicData') for documentation
## loading from cache '/Users/mayuan//.ExperimentHub/427'
查看HMP I粪便数据的元数据
head( pData(dat[[3]]) )
## subjectID body_site body_subsite
## SRS056519 HMP_2012_765094712 stool stool
## SRS016335 HMP_2012_765074482 stool stool
## SRS011061 HMP_2012_158458797 stool stool
## SRS016267 HMP_2012_764669880 stool stool
## SRS053214 HMP_2012_159753524 stool stool
## SRS013521 HMP_2012_159227541 stool stool
## antibiotics_current_use study_condition disease age age_category
## SRS056519 <NA> control healthy 19 schoolage
## SRS016335 <NA> control healthy 20 adult
## SRS011061 <NA> control healthy 30 adult
## SRS016267 <NA> control healthy 29 adult
## SRS053214 <NA> control healthy 23 adult
## SRS013521 <NA> control healthy 22 adult
## gender visit_number BMI country non_westernized
## SRS056519 male NA NA USA no
## SRS016335 male NA NA USA no
## SRS011061 female NA NA USA no
## SRS016267 male NA NA USA no
## SRS053214 female NA NA USA no
## SRS013521 female NA NA USA no
## DNA_extraction_kit number_reads number_bases minimum_read_length
## SRS056519 Qiagen 125076707 12506449555 60
## SRS016335 Qiagen 125043127 11938455408 60
## SRS011061 Qiagen 90085554 8028157196 60
## SRS016267 Qiagen 103068446 9820433068 60
## SRS053214 Qiagen 103639606 9687833906 60
## SRS013521 Qiagen 115013916 10925100631 60
## median_read_length NCBI_accession
## SRS056519 100 SRS056519
## SRS016335 100 SRS016335
## SRS011061 90 SRS011061
## SRS016267 100 SRS016267
## SRS053214 95 SRS053214
## SRS013521 95 SRS013521
查看MetaPhlan相对丰度表
hmp_stool_metaphlan <- exprs(dat[[3]])
hmp_stool_metaphlan[1:10, 1:10]
## SRS056519 SRS016335
## k__Viruses 0.00090 0.00179
## k__Bacteria 99.99910 99.96451
## k__Viruses|p__Viruses_noname 0.00090 0.00179
## k__Bacteria|p__Actinobacteria 0.25980 0.13760
## k__Bacteria|p__Firmicutes 9.52434 18.67835
## k__Bacteria|p__Proteobacteria 2.27579 2.32088
## k__Bacteria|p__Bacteroidetes 87.93779 77.40923
## k__Viruses|p__Viruses_noname|c__Viruses_noname 0.00090 0.00179
## k__Bacteria|p__Actinobacteria|c__Actinobacteria 0.25980 0.13760
## k__Bacteria|p__Firmicutes|c__Bacilli 0.01382 0.02164
## SRS011061 SRS016267
## k__Viruses 0.00000 0.00000
## k__Bacteria 100.00000 100.00000
## k__Viruses|p__Viruses_noname 0.00000 0.00000
## k__Bacteria|p__Actinobacteria 0.04200 0.11105
## k__Bacteria|p__Firmicutes 5.59309 3.98890
## k__Bacteria|p__Proteobacteria 2.98345 0.55210
## k__Bacteria|p__Bacteroidetes 91.07961 95.28808
## k__Viruses|p__Viruses_noname|c__Viruses_noname 0.00000 0.00000
## k__Bacteria|p__Actinobacteria|c__Actinobacteria 0.04200 0.11105
## k__Bacteria|p__Firmicutes|c__Bacilli 0.04196 0.00000
## SRS053214 SRS013521
## k__Viruses 0.00000 0.00882
## k__Bacteria 100.00000 99.99118
## k__Viruses|p__Viruses_noname 0.00000 0.00882
## k__Bacteria|p__Actinobacteria 0.46158 0.08515
## k__Bacteria|p__Firmicutes 16.03493 1.73242
## k__Bacteria|p__Proteobacteria 0.23830 0.49311
## k__Bacteria|p__Bacteroidetes 80.04580 97.68049
## k__Viruses|p__Viruses_noname|c__Viruses_noname 0.00000 0.00882
## k__Bacteria|p__Actinobacteria|c__Actinobacteria 0.46158 0.08515
## k__Bacteria|p__Firmicutes|c__Bacilli 0.01578 0.02924
## SRS018817 SRS017247
## k__Viruses 0.00000 0.00000
## k__Bacteria 100.00000 100.00000
## k__Viruses|p__Viruses_noname 0.00000 0.00000
## k__Bacteria|p__Actinobacteria 0.08518 0.15577
## k__Bacteria|p__Firmicutes 11.52659 4.35400
## k__Bacteria|p__Proteobacteria 0.22363 0.21245
## k__Bacteria|p__Bacteroidetes 87.99725 95.08394
## k__Viruses|p__Viruses_noname|c__Viruses_noname 0.00000 0.00000
## k__Bacteria|p__Actinobacteria|c__Actinobacteria 0.08518 0.15577
## k__Bacteria|p__Firmicutes|c__Bacilli 0.00443 0.01766
## SRS019968 SRS064557
## k__Viruses 0.00077 0.00194
## k__Bacteria 99.99923 99.99806
## k__Viruses|p__Viruses_noname 0.00077 0.00194
## k__Bacteria|p__Actinobacteria 0.16129 0.03814
## k__Bacteria|p__Firmicutes 16.11446 2.69009
## k__Bacteria|p__Proteobacteria 0.17522 1.50628
## k__Bacteria|p__Bacteroidetes 82.54661 95.76355
## k__Viruses|p__Viruses_noname|c__Viruses_noname 0.00077 0.00194
## k__Bacteria|p__Actinobacteria|c__Actinobacteria 0.16129 0.03814
## k__Bacteria|p__Firmicutes|c__Bacilli 0.01099 0.01147