-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathFig6.R
137 lines (100 loc) · 6.44 KB
/
Fig6.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# This script shows how to reproduce the results in Figure 6 of Gupta et. al (Nature Communications, 2020)
# Author: Vinod K. Gupta, PhD
unavailable_pkg <- setdiff(c("vegan","ggplot2","ggpubr","rcompanion"),
rownames(installed.packages()))
install.packages(unavailable_pkg, repos = "http://cran.us.r-project.org")
library(vegan)
library(ggplot2)
library(ggpubr)
library(rcompanion)
# Functions---------------------------------------------
parse_file_list <- function(data_file){
data_list <- scan(data_file, what="", sep="\n")
return (data_list)
}
# I/O
Final_microbiome_data_4347_file <- "Final_metadata_4347.csv"
validation_abundance_file <- "validation_abundance.csv"
validation_meta_file <- "validation_metadata.csv"
study_wise_file <- "study_wise_data.txt"
MH_species_file <- "./MH_species.txt"
MN_species_file <- "./MN_species.txt"
fig6a_out <- "Fig6a.pdf"
fig6b_out <- "Fig6b.pdf"
Final_microbiome_data_4347 <- read.csv(Final_microbiome_data_4347_file, sep = ",",
header = T,row.names = 1,check.names = F)
fig6 <- read.csv(validation_abundance_file, sep = ",", header = TRUE,row.names = 1,check.names = F)
fig6_metadata <- read.csv(validation_meta_file, sep = ",", header = TRUE,row.names = 1,check.names = F)
# GMHI calculation for validation datasets
H_species <- parse_file_list(MH_species_file) # from Table 2
NH_species <- parse_file_list(MN_species_file) # from Table 2
# Processing of meta-data from validation cohort
fig6_1 <- fig6[-grep('unclassified', row.names(fig6)),] # unclassified species removed
fig6_2 <- fig6_1[-grep('virus', row.names(fig6_1)),] # viruses removed
fig6_3<-sweep(fig6_2,2,colSums(fig6_2),`/`) # re-normalization to get relative abundances after removing unclassified and viruses
fig6_4<-(fig6_3)*100
fig6_4[fig6_4 < 0.001] <- 0
# Healthy normalization constant calculated based on training dataset
# Nonhealthy normalization constant calculated based on training dataset
H_constant <- 7
NH_constant <- 31
# Extracting H-plus species
# Extracting H-minus species
validation_H <- fig6_4[row.names(fig6_4) %in% H_species, ]
validation_NH <- fig6_4[row.names(fig6_4) %in% NH_species, ]
alpha <- function(x){sum((log(x[x>0]))*(x[x>0]))*(-1)}
H_shannon <- apply((validation_H/100), 2, alpha)
NH_shannon <- apply((validation_NH/100), 2, alpha)
H_sig_count <- apply(validation_H, 2, function(i) (sum(i > 0)))
NH_sig_count <- apply(validation_NH, 2, function(i) (sum(i > 0)))
H_GMHI <- ((H_sig_count/H_constant)*H_shannon)
NH_GMHI <- ((NH_sig_count/NH_constant)*NH_shannon)
GMHI <- data.frame(log10((H_GMHI+0.00001)/(NH_GMHI+0.00001)))
colnames(GMHI) <- c("GMHI")
# Shannon diversity calculation for validation cohort
alpha_shannon_fig6 <- data.frame(diversity(fig6_4, 2, index="shannon"))
fig6_alpha_GMHI_data <- merge(as.data.frame(GMHI),as.data.frame(alpha_shannon_fig6), by='row.names', all=F)
colnames(fig6_alpha_GMHI_data) <- c("Sample_id","GMHI","alpha")
row.names(fig6_alpha_GMHI_data) <- fig6_alpha_GMHI_data$Sample_id
fig6_alpha_GMHI_data1 <- merge(as.data.frame(fig6_alpha_GMHI_data),
as.data.frame(fig6_metadata), by='row.names', all=F)
# Remove obesity, overweight and underweight samples
fig6_alpha_GMHI_data_final <- fig6_alpha_GMHI_data1[-grep(':OB|:OW|:UW', fig6_alpha_GMHI_data1$Phenotype_all),]
# healthy_nonhealthy_validation_sample_counts
H_Un_count <- data.frame(table(fig6_alpha_GMHI_data_final$Phenotype))
# phenotype_wise_validation_sample_counts
All_phenotype_counts <- data.frame(table(fig6_alpha_GMHI_data_final$Phenotype_all))
fig6_alpha_GMHI_data_final$Phenotype <- gsub(x = fig6_alpha_GMHI_data_final$Phenotype,
pattern = "Unhealthy", replacement = "Nonhealthy")
# Figure 6a (GMHI for combined phenotype as Healthy vs Nonhealthy)
pdf(fig6a_out)
p_gmhi_violin <- ggplot(fig6_alpha_GMHI_data_final, aes(x=Phenotype, y=GMHI, fill=Phenotype)) +
geom_violin(trim=FALSE)+geom_boxplot(width=0.1,fill="white")
p_gmhi_violin+rremove("legend")+theme(axis.text=element_text(size=14,face="bold"),
axis.title=element_text(size=14,face="bold"))+
scale_color_manual(values=c('steelblue','orange2'))+
stat_compare_means(label = "p.format",method = "wilcox.test",label.x.npc = "middle")+
scale_fill_manual(values=c('steelblue','orange2'))+
labs(x = "",y="Gut Microbiome Health Index (GMHI)",title = "Fig6a")+
theme_bw()+theme(panel.grid = element_blank())+rremove("legend")
dev.off()
cliffDelta(data =fig6_alpha_GMHI_data_final,GMHI~Phenotype)
# For re-ordering purposes
fig6_alpha_GMHI_data_final$Phenotype_all <- factor(fig6_alpha_GMHI_data_final$Phenotype_all,
levels=c('Healthy_P','N-1:Healthy','N-8:Healthy','N-5:AS','N-7:Colorectal adenoma',
'N-1:OB','N-8:CRC','N-1:OW','N-7: Italy:CRC','N-7: Japan:CRC',
'N-1:UW','CD','Liver cirrhosis','N-4:NAFLD','N-7:OB','N-7:OW','N-7:UW',
'N-8:OB','N-8:OW','N-8:UW','Rheumatoid arthritis'))
# Figure 6b (GMHI for all study-wise phenotypes)
pdf(fig6b_out)
fig6_GMHI_boxplot <- ggboxplot(fig6_alpha_GMHI_data_final, x = "Phenotype_all", y = "GMHI",
color = "Phenotype_all",
add = "jitter",
short.panel.labs = FALSE,outlier.size=0,xlab = "", ylab = "Gut Microbiome Health Index (GMHI)",title = "Fig6b")
# fig6_GMHI_boxplot +rremove("legend")+theme(axis.text=element_text(size=14,face="bold"),axis.title=element_text(size=14,face="bold"))+scale_color_manual(values=c("#FB8072","steelblue","orange2","steelblue","orange2","orange2","#BEBADA","#BEBADA","#FFFFB3","#BEBADA","steelblue","#D9D9D9"))
# Significant_GMHI_H1 <- compare_means(GMHI ~ Phenotype_all, data = fig6_alpha_GMHI_data_final, p.adjust.method = "fdr",symnum.args = list(cutpoints = c(0,0.01,1), symbols = c("*","ns")),ref.group = "Healthy_P")
# Significant_GMHI_H2 <- compare_means(GMHI ~ Phenotype_all, data = fig6_alpha_GMHI_data_final, p.adjust.method = "fdr",symnum.args = list(cutpoints = c(0,0.01,1), symbols = c("*","ns")),ref.group = "N-1:Healthy")
# Significant_GMHI_H3 <- compare_means(GMHI ~ Phenotype_all, data = fig6_alpha_GMHI_data_final, p.adjust.method = "fdr",symnum.args = list(cutpoints = c(0,0.01,1), symbols = c("*","ns")),ref.group = "N-8:Healthy")
fig6_GMHI_boxplot
dev.off()
# End