Steroid Data Analysis

Pauli Tikka

2026-04-14

Introduction

# Welcome to the 'steroid data analysis' webpage! a

# The procedures and explanations to make all the analysis and plots are in their individual chapters below. 
# These methods could be also easily applied to other types of data sets and metabolites than 'steroids' and their respective metadata per se. 
# In addition, there is a small 'disclaimer' also at the end of this webpage to emphasize that this site is mainly for educational purposes.
# Please let me know if you have any questions. For that, use the 'following' email: patati at the university of Turku

Loading Required R Packages

# Set library paths if needed
# .libPaths(c("C:/Program Files/R/R-4.4.1/library", .libPaths()))
# Fyi: adding echo=FALSE here (at the R markdown header) does not show this code block.

# List of libraries to load (alphabetically sorted)
packages <- c("bigsnpr", "binilib", "brickster", "car", "censReg", "circlize", "ComplexHeatmap", "correlation", 
              "corrplot", "daiR", "datarium", "dmetar", "dplyr", "effsize", "extrafont", "forcats", "fs", "FSA", 
              "ggcorrplot", "ggforce", "ggforestplot", "ggplot2", "ggplotify", "ggpubr", "ggsankey", "ggsankeyfier", 
              "ggh4x", "ggtext", "glmnet", "grid", "Hmisc", "hrbrthemes", "igraph", "insight", "lavaan", "lmtest", 
              "lme4", "lsr", "magick", "magrittr", "Maaslin2", "mdatools", "mediation", "meta", "mgcv", "mlma", 
              "MOFA2", "pheatmap", "PerformanceAnalytics", "pathviewr", "plyr", "plotrix", "ppcor", "prettydoc", 
              "psych", "quantreg", "qpgraph", "ragg", "RColorBrewer", "rcompanion", "readxl", "remotes", "reshape2", 
              "rgl", "rmarkdown", "rmdformats", "rstatix", "scales", "scater", "scatterplot3d", "sjPlot", "stringr", 
              "superb", "tibble", "tidyverse", "tint", "tufte", "viridis","WGCNA", "xlsx")

# Load all libraries
.libPaths(c("C:/Program Files/R/R-4.4.1/library", .libPaths()))
# invisible(lapply(packages, library, character.only = TRUE))

# invisible(lapply(packages, function(pkg) {
#   if (!requireNamespace(pkg, quietly = TRUE)) {
#     install.packages(pkg)
#   }
#   library(pkg, character.only = TRUE)
# }))
# 
library(scales)
invisible(lapply(setdiff(packages, "scales"), library, character.only = TRUE))



# Note: Do not load 'forestplot' as it conflicts with 'ggforestplot'
# You may need to load both rmarkdown and rmdformats and install some dependencies (if prompted) to get this loading done.
# If done together with Github, this maybe good to know: https://stackoverflow.com/questions/11384928/change-git-repository-directory-location

# Install packages if not already installed
# renv::install() # Installs from the basic R repository
# if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager")
# BiocManager::install(c("ComplexHeatmap", "DESeq2", "dmetar", "fgsea", "ggforestplot", "ggsankey", "limma", "Maaslin2", "metagenomeSeq", "MOFA2", "qpgraph", "scater", "scRNAseq", "sevenbridges"))
# remotes::install_github(c("davidsjoberg/ggsankey", "fossbert/binilib", "MathiasHarrer/dmetar", "mattflor/chorddiag", "NightingaleHealth/ggforestplot"))
# devtools::install_github("mattflor/chorddiag") # Alternative installation method

#Fyi: Run Current Chunk:
# Ctrl + Alt + C (Windows/Linux)
# Run All Chunks Above:
# Ctrl + Alt + P (Windows/Linux)
# Run All Chunks Below:
# Ctrl + Alt + N (Windows/Linux)
# Run All Chunks in Document:
# Ctrl + Alt + R (Windows/Linux)

Importing Data and Metadata

# First set your data folder:
setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")
# or:"C:/Users/patati/Desktop/Turku/R" #check the wd with: here::here() #or getwd()
load("thereal_v2.RData") #This is so to say real data, and it is not available here (at the site).

# It is easier to load the ready stiched data in one go with .RData file than one by one as below, but
# for educational purposes I put some examples what may need to be done to get your data in ok form 
# for later purposes
setwd("C:/Users/patati/Desktop/Turku/R")
NonAlcoholicFattyLiverDisease=read_excel("NAFLD_SteroidStudy.xlsx",sheet = "LFAT_steroidsDATA") # This is partly auxiliary
columnNames=colnames(NonAlcoholicFattyLiverDisease); NonAlcoholicFattyLiverDisease=data.frame(NonAlcoholicFattyLiverDisease)

#The names of the steroid steroidGroups need to be imported early on:
steroidGroups=read.csv("groups_17823.csv", header = TRUE, sep=";")
steroidGroups=steroidGroups[,c('Group','Abbreviation')]
steroidGroups=steroidGroups[steroidGroups[,'Abbreviation']!='F',]
steroidGroups=steroidGroups[order(steroidGroups[,'Group']),]
steroidGroups[,'Abbreviation'][steroidGroups[,'Abbreviation']=='17aOH-P4']='17a-OHP4'
groupValues=steroidGroups

steroidNames=read_excel("NAFLD_SteroidStudy_for groups.xlsx",sheet = "Steroid name abbreviations") 
# This is partly auxiliary
groups2=data.frame(steroidNames)[,1:4]; g1=read.csv("groups_17823.csv", header = TRUE, sep=";")
groups2=cbind(g1[,'Group'], groups2[,c('Abbreviation','Abbreviation_old','Name')])
groups2=groups2[groups2[,'Abbreviation']!='FF',]; colnames(groups2)[1]="Group";
groups2=groups2[order(groups2[,'Group']),]
groups2[,'Abbreviation'][groups2[,'Abbreviation']=='17aOH-P4']='17a-OHP4'
groups2[,'Group'][groups2[,'Abbreviation_old']=='DOC']='Mineralocorticoids'
# head(groups2)


#P4 was found from elsewhere to have the following characteristics:
NonAlcoholicFattyLiverDisease[,'P4'] = as.numeric(NonAlcoholicFattyLiverDisease[,'P4'])
# NonAlcoholicFattyLiverDisease[,'P4'][is.na(NonAlcoholicFattyLiverDisease[,'P4'])] = 22557.3330346846 
# median(NonAlcoholicFattyLiverDisease[,'P4'], na.rm=TRUE) 
NonAlcoholicFattyLiverDisease[,5:7][NonAlcoholicFattyLiverDisease[,5:7]==0.01]=0; colnames(NonAlcoholicFattyLiverDisease)=columnNames
MetabolicAssociatedLiverDisease=read_excel("Combined.Matrix.For.Pauli.2023.10.17.Excel.Formatv2.xlsx") # This is the main file
columnNames=colnames(MetabolicAssociatedLiverDisease); MetabolicAssociatedLiverDisease=data.frame(MetabolicAssociatedLiverDisease); colnames(MetabolicAssociatedLiverDisease)=columnNames   
# All kinds of tricks are needed for getting the right data format
rownames(MetabolicAssociatedLiverDisease)=MetabolicAssociatedLiverDisease[,1]
MetabolicAssociatedLiverDisease[,'P4'] = as.numeric(MetabolicAssociatedLiverDisease[,'P4']) 
#The same comment as above
MetabolicAssociatedLiverDisease[,'P4'][is.na(MetabolicAssociatedLiverDisease[,'P4'])] = 22557.3330346846 
evaluationCriteria=c('Grade(0-3)', 'Stage(0-4)','Necroinflammation')
MetabolicAssociatedLiverDisease[,evaluationCriteria][MetabolicAssociatedLiverDisease[,evaluationCriteria]==0.01]=0; 
targetData=c('11-KDHT','AN','DHT','17a-OHP5','E2','P5','DOC')
valueList=c(103,252,51,200,26.5,253,10); valueListAdjusted=c(100,250,50,200,25,250,10)
for (i in 1:7) {MetabolicAssociatedLiverDisease[,targetData][i][MetabolicAssociatedLiverDisease[,targetData][i]==valueList[i]]=valueListAdjusted[i]} 

# These (E) are ok as per lab:
menopauseMarkers=read.csv('E_tikka231023.csv',header=TRUE, sep=";")
menopauseMarkersPatients=rownames(MetabolicAssociatedLiverDisease[MetabolicAssociatedLiverDisease[,'E']==106000,]) 
patientNumbers=menopauseMarkers[which(menopauseMarkers[,1] %in% menopauseMarkersPatients),'patient.number']
markerValues=menopauseMarkers[which(menopauseMarkers[,1] %in% menopauseMarkersPatients),'E']
MetabolicAssociatedLiverDisease[as.character(patientNumbers),'E']=markerValues
# These (11-KA4) will perhaps change in the lab (sometime after 24.10.23):
marker11KA4=read.csv('11KA4_tikka231023.csv',header=TRUE, sep=";")
# marker11KA4[,1][c(1:5,9)];MetabolicAssociatedLiverDisease[as.character(marker11KA4[,1][c(1:5,9)]),'11-KA4'] 
#These were denoted with 'big interference'
MetabolicAssociatedLiverDisease[as.character(marker11KA4[,1][c(1:5,9)]),'11-KA4'] = NA 
#Alternatively: median(MetabolicAssociatedLiverDisease[!rownames(MetabolicAssociatedLiverDisease) %in% as.character(marker11KA4[,1][c(1:5,9)]),'11-KA4'])
BMI_ordered_MASLD=MetabolicAssociatedLiverDisease[order(MetabolicAssociatedLiverDisease[,'BMI']),'BMI']
BMI_ordered_NAFLD=NonAlcoholicFattyLiverDisease[order(NonAlcoholicFattyLiverDisease[,'BMI']),'BMI']
uniqueBMIValues=unique(BMI_ordered_NAFLD[! BMI_ordered_NAFLD %in% BMI_ordered_MASLD])
NonAlcoholicFattyLiverDisease=NonAlcoholicFattyLiverDisease[order(NonAlcoholicFattyLiverDisease[,'BMI']),] 
NonAlcoholicFattyLiverDisease=NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[,'BMI']!=uniqueBMIValues,]
MetabolicAssociatedLiverDisease=MetabolicAssociatedLiverDisease[order(MetabolicAssociatedLiverDisease[,'BMI']),]
#https://appsilon.com/imputation-in-r/ #https://www.datasciencemadesimple.com/get-minimum-value-of-a-column-in-r-2/?expand_article=1
# New data import withouth changing the conames: https://readxl.tidyverse.org/articles/column-names.html
bileAcidsLiverData=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "Liver_BA",.name_repair = "minimal")); row.names(bileAcidsLiverData)=bileAcidsLiverData[,1]
PFASSerumData=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "PFAS_serum",.name_repair = "minimal")); rownames(PFASSerumData)=as.vector(unlist(PFASSerumData[,1]))
serumBileAcidsData=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "Serum_BA",.name_repair = "minimal"));rownames(serumBileAcidsData)=as.vector(unlist(serumBileAcidsData[,1]))
C4Data=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "C4",.name_repair = "minimal")); rownames(C4Data)=as.vector(unlist(C4Data[,1]))
clinicalData=data.frame(read_excel("Matching clinical data_all.xlsx",sheet = "Sheet1",.name_repair = "minimal")); rownames(clinicalData)=as.vector(unlist(clinicalData[,1]));
#https://www.analyticsvidhya.com/blog/2021/06/hypothesis-testing-parametric-and-non-parametric-tests-in-statistics/
MetabolicAssociatedLiverDisease[1:2,2:20] #or head(MetabolicAssociatedLiverDisease);
##                    E     11-KT 17a-OHP5        S        B  11b-OHA4 11-KDHT
## 24145190413 47616.43 1013.1342 5023.952 754.5581 8580.997 13735.454     100
## 2459090909  44356.73  935.1387 1111.426 569.7927 6167.239  4709.126     100
##               11-KA4      DHEA    T/Epi-T  17aOH-P4       AN      DHT       A4
## 24145190413 1833.115 12456.910   912.9862 1095.8051 250.0000  50.0000 3065.905
## 2459090909  1004.484  5890.108 10091.7241  864.5884 574.9827 471.7821 1683.676
##                  DOC       P5        P4        A       E2
## 24145190413 58.15483 2375.196 270.70746 516.4204 864.3368
## 2459090909  53.65145 1583.961  85.07423 752.5202  60.0817
# The below ordering needs to be changed...
bileAcidsLiverData=bileAcidsLiverData[as.character(MetabolicAssociatedLiverDisease$PatientNumber),];#bileAcidsLiverData[1:3,2:10] #https://stackoverflow.com/questions/54264980/r-how-to-set-row-names-attribute-as-numeric-from-character 
# I did otherway around
serumBileAcidsData=serumBileAcidsData[as.character(MetabolicAssociatedLiverDisease$PatientNumber),];#serumBileAcidsData[1:3,2:10]
clinicalData=clinicalData[as.character(MetabolicAssociatedLiverDisease$PatientNumber),];#clinicalData[1:3,2:10] 
# Many of these variables are irrelevant/not-used here... so not opening e.g. uniqueBMIValues, they would exhaust this file
C4Data=C4Data[as.character(MetabolicAssociatedLiverDisease$PatientNumber),];#C4Data[1:3,]
PFASSerumData=PFASSerumData[as.character(MetabolicAssociatedLiverDisease$PatientNumber),];#PFASSerumData[1:3,2:10]

# Menopause markers:
menopause=read_excel("Putative_metabolic_markers_menopause.xlsx",sheet='menopause markers',.name_repair = "minimal"); # rownames(clinicalData)=as.vector(unlist(clinicalData[,1]));
menopause=menopause[8:dim(menopause)[1],]; menopause=menopause[,-15]; menopause[2,2:14]=menopause[1,2:14]; menopause=data.frame(menopause); menopause[2,13:14]=c('v1','v2'); #dim(menopause)
colnames(menopause)=c('row_names',menopause[2,2:dim(menopause)[2]]); menopause=menopause[3:dim(menopause)[1],];rownames(menopause)=as.vector(unlist(menopause[,1]));
menopause=menopause[as.character(MetabolicAssociatedLiverDisease$PatientNumber),]
colnames(PFASSerumData)[colnames(PFASSerumData)=='PFHxA.1']='PFHxA_Branched'
PFASSerumData=PFASSerumData[,colnames(PFASSerumData)!='Benzylparaben.1']
PFASSerumData[PFASSerumData[,'Benzylparaben']>10,'Benzylparaben']=NA 

Jeihou=data.frame(read_excel("Copy of BA_liverfat_RawData.xls",.name_repair = "minimal")); row.names(Jeihou)=Jeihou[,1];Jeihou=Jeihou[as.character(MetabolicAssociatedLiverDisease$PatientNumber),]
u=Jeihou[Jeihou[,'GHDGA']=='<LLOQ',1]; a=u[!is.na(u)]; b=rownames(bileAcidsLiverData[bileAcidsLiverData[,'GHDGA']==1,]);
uu=Jeihou[Jeihou[,'GHDGA']=='No Result',1]; aa=uu[!is.na(uu)]; 
bileAcidsLiverData[as.character(a),'GHDGA']=min(bileAcidsLiverData[,'GHDGA'],na.rm=TRUE)/2
AuxBileAcid=bileAcidsLiverData[bileAcidsLiverData[,'GHDGA']==1,1] 
bileAcidsLiverData[as.character(AuxBileAcid),'GHDGA']=NA
#https://www.datasciencemadesimple.com/get-minimum-value-of-a-column-in-r-2/?expand_article=1
mat=bileAcidsLiverData[,c('TbMCA','ToMCA','TDCA','TDHCA','TLCA')]
mat[!mat>1]=10000
mat[mat==2]=10000 #Colmins did not work so I used (i.e. colmins did not work so I used):
hip=do.call(pmin, lapply(1:nrow(mat), function(i)mat[i,])) #https://stackoverflow.com/questions/13676878/fastest-way-to-get-min-from-every-column-in-a-matrix
hou=c('TbMCA','ToMCA','TDCA','TDHCA','TLCA')
for (i in 1:5) {bileAcidsLiverData[bileAcidsLiverData[,hou[i]]==1,hou[i]]=hip[i]}
for (i in 1:5) {bileAcidsLiverData[bileAcidsLiverData[,hou[i]]==2,hou[i]]=hip[i]}

# An imputation for the missing values:
C4Data[is.na(C4Data[,2]),2]=median(C4Data[!is.na(C4Data[,2]),2]) #assuming that these were not below quantitation and replacing with median
#https://www.geeksforgeeks.org/performing-logarithmic-computations-in-r-programming-log-log10-log1p-and-log2-functions/
#https://stackoverflow.com/questions/50476717/i-want-to-align-match-two-unequal-columns

#Matching two unequal columns.. match the names of one original column (dat2) to ones that are missing (dat1 with to other) #Not sure if this should be this difficult...
CombinedData=cbind(MetabolicAssociatedLiverDisease[,1],NonAlcoholicFattyLiverDisease[,2:7],clinicalData[,'HOMA.IR'],
                   MetabolicAssociatedLiverDisease[,colnames(NonAlcoholicFattyLiverDisease[,8:27])],
                   bileAcidsLiverData[,2:dim(bileAcidsLiverData)[2]],C4Data[,2:dim(C4Data)[2]],
                   serumBileAcidsData[,2:dim(serumBileAcidsData)[2]],
                   PFASSerumData[,(2:(dim(PFASSerumData)[2]))], MetabolicAssociatedLiverDisease[,'PFAS']);
colnames(CombinedData)[colnames(CombinedData)=='C4Data[, 2:dim(C4Data)[2]]']='C4Data';colnames(CombinedData)[colnames(CombinedData)=='clinicalData[, \"HOMA.IR\"]']='HOMA-IR'
colnames(CombinedData)[colnames(CombinedData)=='MetabolicAssociatedLiverDisease[, \"PFAS\"]']='PFAS';
colnames(CombinedData)[colnames(CombinedData)=="MetabolicAssociatedLiverDisease[, 1]" ]='PatientNumber';#colnames(CombinedData)#
rownames(CombinedData)=unlist(bileAcidsLiverData[,1]); 
RelevantColumns=colnames(CombinedData)[!colnames(CombinedData) %in% c( "Benzylparaben" ,"Methylparaben")] 

# Not sure when it is the best time to take not needed variables away, perhaps at the very end?
CombinedData=CombinedData[,RelevantColumns]
# Here I add the lipids. In the future, I need to divide all the steroidGroups in their own components 
# e.g. dataframe called 'lipids' so that adding uniqueBMIValues will be more straightforward:
CombinedData=cbind(CombinedData,MetabolicAssociatedLiverDisease[,(dim(MetabolicAssociatedLiverDisease)[2]-13):dim(MetabolicAssociatedLiverDisease)[2]]) 
# hupo=match(   colnames(CombinedData)[colnames(CombinedData) %in% groups2[,3]], groups2[,3] ) # do ni; https://www.geeksforgeeks.org/how-to-find-index-of-element-in-vector-in-r/
# tvauxe=CombinedData
# colnames(CombinedData)[colnames(CombinedData) %in% groups2[,3]]=groups2[hupo,2]

# CombinedData_v2
# 
# vec1=colnames(serumBileAcidsData)
# vec2=colnames(bileAcidsLiverData)
# 
# common_elements <- intersect(vec1, vec2)

colnames(CombinedData)[colnames(CombinedData)=='C4Data']='C4'

#.1:set on seerumia

# The basic preprocessing is just the below lines:
tve=CombinedData[,2:dim(CombinedData)[2]]; tve[tve == 0] <- NA; #Almost all variables are here
HalfImputedData <- tve %>% mutate(replace(., is.na(.), min(., na.rm = T)/2)) #https://mdatools.com/docs/preprocessing--autoscaling.html
Log2TransformedData <- log2(HalfImputedData);
AutoScaledData <- prep.autoscale(as.matrix(Log2TransformedData), center = TRUE, scale = TRUE);  #https://svkucheryavski.gitbooks.io/mdatools/content/preprocessing/text.html
AllData=cbind(CombinedData[,1],AutoScaledData); 

# Changing the column names needs to have separate variables for each type of variable (contaminant, steroid, etc.)
x1=colnames(AllData[,c(1:8)]); v2=dim(NonAlcoholicFattyLiverDisease)[2]+1
x2=colnames(AllData[,9:v2]);v3=(dim(bileAcidsLiverData)[2]+v2);x3=colnames(AllData[,(v2+1):(v3)]);v4=(dim(serumBileAcidsData)[2])+v3
x4=colnames(AllData[,(v3+1):(v4-1)]);x5=colnames(AllData[,(v4):(dim(AllData)[2])]); 
x3 <- paste(x3, "_L", sep="") #https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
x4=gsub("(-[0-9]*)*.1", "", x4) #https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub
x4 <- paste(x4, "_S", sep="")# https://rdrr.io/bioc/qpgraph/man/qpNrr.html
x5a=x5[1:9]
x6=x5[10:length(x5)] #Dividing to lipids
x5=x5a  #Making sure that PFAS are separate
nm = c(x1,x2,x3,x4,x5,x6); nm=c('PatientNumber','Gender','AGE','BMI','Steatosis Grade','Fibrosis Stage','Necroinflammation','HOMA-IR',nm[9:length(nm)])
colnames(AllData)=nm; #AllData[1:5,1:30]; #NonAlcoholicFattyLiverDisease[1:2,1:28];
colnames(AllData)[colnames(AllData)=='MetabolicAssociatedLiverDisease[, \"PFAS\"]']='PFAS';
# This (deletion) is good to do after all the previous:
x5=x5[x5!='PFAS'];x5=x5[x5!='Perfluorodecyl.ethanoic.acid']; x6=x6[x6!='Total_TG'] # x1;x2;x3;x4;x5;
AllData=AllData[,!colnames(AllData) %in% c('Total_TG','PFAS',"Perfluorodecyl.ethanoic.acid")]

# In case you would need just the logged values:
tv_half_log22=cbind(CombinedData[,1],Log2TransformedData);
x1=colnames(tv_half_log22[,c(1:8)]); v2=dim(NonAlcoholicFattyLiverDisease)[2]+1
x2=colnames(tv_half_log22[,9:v2]);v3=(dim(bileAcidsLiverData)[2]+v2);
x3=colnames(tv_half_log22[,(v2+1):(v3)]);v4=(dim(serumBileAcidsData)[2])+v3
x3=x3[c(length(x3),1:(length(x3)-1))]
x4=colnames(tv_half_log22[,(v3+1):(v4-1)]);
x5=colnames(tv_half_log22[,(v4):(dim(tv_half_log22)[2])]);
x3 <- paste(x3, "_L", sep="") 
#https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
x4=gsub("(-[0-9]*)*.1", "", x4) #https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub
x4 <- paste(x4, "_S", sep="")# https://rdrr.io/bioc/qpgraph/man/qpNrr.html
x5a=x5[1:9]
x6=x5[10:length(x5)] #dividing to lipids
x5=x5a  #making sure that PFAS are separate
nm = c(x1,x2,x3,x4,x5,x6); nm=c('PatientNumber','Gender','AGE','BMI','Steatosis Grade','Fibrosis Stage','Necroinflammation','HOMA-IR',nm[9:length(nm)])
colnames(tv_half_log22)=nm; #tv_half_log22[1:5,1:30]; #NonAlcoholicFattyLiverDisease[1:2,1:28];
colnames(tv_half_log22)[colnames(tv_half_log22)=='MetabolicAssociatedLiverDisease[, \"PFAS\"]']='PFAS';
# This (deletion) is good to do after all the previous:
x5=x5[x5!='PFAS'];x5=x5[x5!='Perfluorodecyl.ethanoic.acid']; x6=x6[x6!='Total_TG'] # x1;x2;x3;x4;x5;
tv_half_log22=tv_half_log22[,!colnames(tv_half_log22) %in% c('Total_TG','PFAS',"Perfluorodecyl.ethanoic.acid")]

# This needs to be done early on:
colnames(CombinedData)[colnames(CombinedData)=='17aOH-P4']='17a-OHP4'
colnames(tv_half_log22)[colnames(tv_half_log22)=='17aOH-P4']='17a-OHP4'
colnames(AllData)[colnames(AllData)=='17aOH-P4']='17a-OHP4'

AllData=AllData[,!colnames(AllData) %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]
AllData=AllData[,!colnames(AllData) %in% x4]
# In case you would need nonscaled covariates and scaled/logged all other variables:
CovariatesScaledData=AllData
CovariatesNonScaledData=cbind(CombinedData[,1:8],AllData[,9:dim(AllData)[2]])
LogCovariatesScaledData=tv_half_log22
LogCovariatesNonScaledData=cbind(CombinedData[,1:8],tv_half_log22[,9:dim(tv_half_log22)[2]])
colnames(CovariatesNonScaledData)[1:8]=colnames(AllData)[1:8]
colnames(LogCovariatesNonScaledData)[1:8]=colnames(AllData)[1:8]
# This is needed occasionally:
CurrentData=CovariatesScaledData 
# https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
# https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub # https://rdrr.io/bioc/qpgraph/man/qpNrr.html

hupo=match(   colnames(CurrentData)[colnames(CurrentData) %in% groups2[,3]], groups2[,3] ) 
# do ni; https://www.geeksforgeeks.org/how-to-find-index-of-element-in-vector-in-r/
tvauxe=CurrentData
colnames(CurrentData)[colnames(CurrentData) %in% groups2[,3]]=groups2[hupo,2]

# This needs to be done also soon, to gather all the treatment etc. variable names separately...: 
TreatmentVariables=colnames(AllData)[52:58];
MediatorVariables=colnames(AllData)[9:28];
OutcomeVariables=colnames(AllData)[c(29:51,59:71)]; ##https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/

OutcomeVariables=OutcomeVariables[!OutcomeVariables %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]
OutcomeVariables=OutcomeVariables[! OutcomeVariables %in% x4] #https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/
MediatorVariables[MediatorVariables=="17aOH-P4"]="17a-OHP4"
TreatmentVariables=TreatmentVariables[!TreatmentVariables %in% c('Perfluorodecyl.ethanoic.acid')]

colnames(CovariatesScaledData)[colnames(CovariatesScaledData)=='C4Data_L']='C4_L'
colnames(CurrentData)[colnames(CurrentData)=='C4Data_L']='C4_L'
colnames(tvauxe)[colnames(tvauxe)=='C4Data_L']='C4_L'
colnames(AllData)[colnames(AllData)=='C4Data_L']='C4_L'
colnames(NonAlcoholicFattyLiverDisease)[colnames(NonAlcoholicFattyLiverDisease)=='C4Data_L']='C4_L'
x3[x3=='C4Data_L']='C4_L'

# Let's introduce some randomization. First, let us define the range of the random numbers:
# min_val <- 0.83; max_val <- 1.17
# 
# # Then a function for the randomization:
# multiply_with_random <- function(df, cols, min_val, max_val) {
#   for (col in cols) {
#     if (col %in% names(df)) {
#       df[[col]] <- df[[col]] * runif(nrow(df), min = min_val, max = max_val)
#     } else {
#       warning(paste("Column", col, "does not exist in the data frame"))}}
#   return(df)}
# 
# CovariatesScaledData=multiply_with_random(CovariatesScaledData, 3:(dim(CovariatesScaledData)[2]), min_val, max_val)
# CurrentData=multiply_with_random(CurrentData, 3:(dim(CurrentData)[2]), min_val, max_val)
# tvauxe=multiply_with_random(tvauxe, 3:(dim(tvauxe)[2]), min_val, max_val)
# AllData=multiply_with_random(AllData, 3:(dim(AllData)[2]), min_val, max_val)

# Just in case:
# save.image('thereal_v2.RData')
# setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")

# Got some help for commenting and variable namings from Copilot to above code. 
# Also multiply_with_random was done together with Copilot.

Setting Global Variables

options(scipen = 999) # Disable scientific notation
# rm(list = ls()) # Clear workspace; this should not be if you have the load above
thedate <- strftime(Sys.Date(), "%d%m%y") #Do not take the old date from the load...
date <- paste0('tikka', thedate) # Customize this as needed
knitr::opts_knit$set(root.dir = 'C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis') #Is this working? Yes, but in knitr:https://forum.posit.co/t/setting-working-directory-in-rmarkdown/70849

# Example installation commands
# remotes::install_github("fossbert/binilib", force=TRUE)
# install.packages(c('tidyverse', 'tibble'))
# if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager")
# BiocManager::install("Maaslin2")
# devtools::install_github("davidsjoberg/ggforestplot")
# remotes::install_version("insight", version = "0.20.5", repos = "http://cran.us.r-project.org", force=TRUE)
# font_import() # Import fonts if not already done
# loadfonts(device = "win") # Load fonts for Windows
# renv::status() # Check renv status
# library(rmarkdown); render("path/to/file.Rmd") # Render R Markdown document
# remove.packages("DelayedArray")
# BiocManager::install("DelayedArray")
# install.packages("Require") # Install 'Require' package
# If the installation to the project file is problematic, do it to the general folder and add it to the .libPaths(), as above library import.

Making Boxplots

#https://r-graph-gallery.com/265-grouped-boxplot-with-ggplot2.html
#https://stackoverflow.com/questions/53724834/why-does-the-plot-size-differ-between-docx-and-html-in-rmarkdownrender


windowsFonts(Arial = windowsFont("Arial"))

CreateBoxplots <- function(tvt, Group, OutcomeVariables, Out, oute, other) {
  # Filter data based on gender
  tvt <- tvt %>%
    filter(if (Group == 'Male') Gender == 2 else if (Group == 'Female') Gender == 1 else TRUE)
  
  # Prepare data for plotting
  Steroid <- rep(colnames(tvt[, 9:28]), each = nrow(tvt))
  data2 <- rep('Control', nrow(tvt))
  num <- ifelse(OutcomeVariables == 'HOMA-IR', 1.5, min(tvt[[OutcomeVariables]]))
  data2[tvt[[OutcomeVariables]] > num] <- 'Case'
  TreatmentVariables <- data2
  Concentration <- as.vector(unlist(tvt[, 9:28]))
  data <- data.frame(Steroid, TreatmentVariables, Concentration)
  data$Group <- 0 
  
  # Correct steroid names if the level exists
  if ("17aOH-P4" %in% levels(data$Steroid)) {
    data <- data %>%
      mutate(Steroid = fct_recode(Steroid, '17a-OHP4' = '17aOH-P4'))
  }
  
  
  groups3[groups3[,1]=='Estrogens',1]  ='Estrog.' #''
  
  # Assign steroidGroups
  rownames(groups3) <- 1:20
  for (i in seq_len(nrow(groups3))) {
    data[data$Steroid %in% groups3$Abbreviation[i], 'Group'] <- groups3$Group[i]
  }
  
  # Set plot title
  title <- paste(Out, "'s Effect on Concentrations of Steroids in ", Group, sep = "")
  
  # Define legend labels
  e1 <- ifelse(num == 1.5, paste('Case (>', num, ')', sep = ""), paste('Case (>', 0, ')', sep = ""))
  e2 <- ifelse(num == 1.5, paste('Control (<=', num, ')', sep = ""), paste('Control (=', 0, ')', sep = ""))
  
  # Remove rows with NA concentrations
  data <- data %>% filter(!is.na(Concentration))
  
  # Create a named vector for mapping (this and the below line was as per Copilot)
  abbreviation_map <- setNames(groups3$Abbreviation_old, groups3$Abbreviation)
  # Map the Steroid values to Abbreviation_old values
  data$Abbreviation_old <- abbreviation_map[data$Steroid]
  colnames(data)[1]='BigName'
  colnames(data)[5]='Steroid'
  data=data[,c(5,2,3,4)]

  # Create boxplot
# Create boxplot
# Create boxplot
# p <- ggplot(data, aes(x = Steroid, y = Concentration, fill = TreatmentVariables)) +
#   geom_boxplot(notch = FALSE, notchwidth = 0.5, outlier.shape = 1, outlier.size = 2, coef = 1.5, colour = "") + #grey50
#   theme_classic2() +
#   theme(
#     axis.text.x = element_text(angle = 90, hjust = 0.95, vjust = 0.2, size = 16),
#     axis.text = element_text(color = "black"),
#     panel.grid.minor = element_blank(),
#     text = element_text(size = 16, family = "Times New Roman"),
#     axis.title = element_text(size = 16),
#     plot.title = element_text(size = 16),
#     legend.text = element_text(size = 16),
#     legend.title = element_text(size = 16)
#   ) +
#   labs(x = "Steroids", y = "Log2 of Picomolar Concentrations", title = title) +
#   scale_fill_manual(values = c("orange", "blue"), name = oute, labels = c(e1, e2)) +
#   facet_grid(~Group, scales = "free_x", space = "free") +
#   stat_compare_means(
#     hide.ns = TRUE, label = "p.signif", method = "wilcox.test",
#     symnum.args = list(
#       cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
#       symbols = c("****", "***", "**", "*", "ns")
#     ),
#     size = 8, paired = FALSE, label.y = 15.5
#   )

library(ggplot2)
library(ggpubr)  # for stat_compare_means
library(ggthemes)  # for theme_classic2
library(extrafont)  # if needed for Times New Roman

# p <- ggplot(data, aes(x = Steroid, y = Concentration, fill = TreatmentVariables)) +
#   geom_boxplot(
#     notch = FALSE, notchwidth = 0.5,
#     outlier.shape = 1, outlier.size = 2,
#     coef = 1.5 
#     # colour = ""
#   ) +
#   theme_classic2(base_family = "Times New Roman") +
#   theme(
#     axis.text.x = element_text(angle = 90, hjust = 0.95, vjust = 0.2, size = 16, family = "Times New Roman"),
#     axis.text = element_text(color = "black", family = "Times New Roman"),
#     axis.title.x = element_text(size = 16, face = "bold", family = "Times New Roman"),
#     axis.title.y = element_text(size = 16, face = "bold", family = "Times New Roman"),
#     plot.title = element_text(size = 16, face = "bold", family = "Times New Roman"),
#     legend.text = element_text(size = 16, family = "Times New Roman"),
#     legend.title = element_text(size = 16, face = "bold", family = "Times New Roman"),
#     strip.text = element_text(size = 16, face = "bold", family = "Times New Roman"),
#     strip.background = element_rect(fill = "lightblue", color = "black"),  # customize color here
#     panel.grid.minor = element_blank()
#   ) +
#   labs(
#     x = "Steroids",
#     y = "Log2 of Picomolar Concentrations",
#     title = title
#   ) +
#   scale_fill_manual(
#     values = c("orange", "blue"),
#     name = oute,
#     labels = c(e1, e2)
#   ) +
#   facet_grid(~Group, scales = "free_x", space = "free") +
#   stat_compare_means(
#     hide.ns = TRUE, label = "p.signif", method = "wilcox.test",
#     symnum.args = list(
#       cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
#       symbols = c("****", "***", "**", "*", "ns")
#     ),
#     size = 8, paired = FALSE, label.y = 15.5
#   )


p <- ggplot(data, aes(x = Steroid, y = Concentration, fill = TreatmentVariables)) +
  geom_boxplot(
    notch = FALSE,
    outlier.shape = 1,
    outlier.size = 2,
    coef = 1.5
  ) +
  theme_classic2(base_family = "Arial") +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 0.95, vjust = 0.2, size = 16),
    axis.text.y = element_text(size = 16),
    axis.title = element_text(size = 16, face = "bold"),
    plot.title = element_text(size = 16, face = "bold"),
    legend.text = element_text(size = 16),
    legend.title = element_text(size = 16, face = "bold"),
    strip.text = element_text(size = 16, face = "bold"),
    strip.background = element_rect(fill = "lightblue", color = "black")
  ) +
  labs(
    x = "Steroids",
    y = "Log2 of Picomolar Concentrations",
    title = title
  ) +
  scale_fill_manual(
    values = c("orange", "blue"),
    name = oute,
    labels = c(e1, e2)
  ) +
  facet_grid(~Group, scales = "free_x", space = "free") +
  stat_compare_means(
    hide.ns = TRUE,
    label = "p.signif",
    method = "wilcox.test",symnum.args = list(
      cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
      symbols = c("****", "***", "**", "*", "ns")
    ),
    size = 8,paired = FALSE, label.y = 15.3,
  )


# Define output path
path <- "C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"
fs::dir_create(path)  # Ensure directory exists

# === Extract statistics table (same tests used in stat_compare_means) ===
stats_tbl <- ggpubr::compare_means(
  formula = Concentration ~ TreatmentVariables,
  group.by = "Steroid",
  data = data,
  method = "wilcox.test"
)

# Define statistics output path
stats_file <- fs::path(path, paste0(Group, "_", Out, "_stats.csv"))

# Save stats table
write.csv(stats_tbl, stats_file, row.names = FALSE)

message("✅ Stats table saved: ", stats_file)


# === WILCOXON TEST + SAVE CSV ===
library(rstatix)

stats_tbl <- data %>%
  group_by(Steroid) %>%
  wilcox_test(Concentration ~ TreatmentVariables, paired = FALSE) %>%
  adjust_pvalue(method = "BH") %>%       # FDR correction
  add_significance("p.adj")              # significance stars

stats_file <- fs::path(path, paste0(Group, "_", Out, "_wilcox_stats.csv"))
write.csv(stats_tbl, stats_file, row.names = FALSE)

message("✅ Wilcoxon stats saved: ", stats_file)

# Save as PNG
pngfile <- fs::path(path, paste0(Group, Out, "BoxB", ".png"))
try({
  ragg::agg_png(filename = pngfile, width = 60, height = 36, units = "cm", res = 300, scaling = 2)
  print(p)
  dev.off()
  if (file.exists(pngfile)) {
    message("✅ PNG saved successfully: ", pngfile)
  } else {
    warning("❌ PNG file not found after saving.")
  }
}, silent = FALSE)

# Save as JPEG
jpgfile <- fs::path(path, paste0(Group, Out, "BoxB", ".jpg"))
try({
  if (requireNamespace("ragg", quietly = TRUE) && "agg_jpeg" %in% getNamespaceExports("ragg")) {
    ragg::agg_jpeg(filename = jpgfile, width = 60, height = 36, units = "cm", res = 300, scaling = 2, quality = 95)
  } else {
    grDevices::jpeg(filename = jpgfile, width = 60, height = 36, units = "cm", res = 300, quality = 95)
  }
  print(p)
  dev.off()
  if (file.exists(jpgfile)) {
    message("✅ JPEG saved successfully: ", jpgfile)
  } else {
    warning("❌ JPEG file not found after saving.")
  }
}, silent = FALSE)


# ===== SAVE REAL SVG (VECTOR) =====
svgfile <- fs::path(path, paste0(Group, Out, "BoxB", ".svg"))

svglite::svglite(
  file = svgfile,
  width = 60 / 2.54,   # cm → inches
  height = 36 / 2.54
)
print(p)
dev.off()

if (file.exists(svgfile)) {
  message("✅ SVG saved successfully: ", svgfile)
} else {
  warning("❌ SVG file not found after saving.")
}


# Convert PNG to PDF
# pdffile <- fs::path(path, paste0(Group, Out, "BoxB", ".pdf"))
# try({
#   daiR::image_to_pdf(files = as.character(pngfile), pdf_name = as.character(pdffile))
#   if (file.exists(pdffile)) {
#     message("✅ PDF created successfully: ", pdffile)
#   } else {
#     warning("❌ PDF file not found after conversion.")
#   }
# }, silent = FALSE)
# 
# # Optionally include PNG in document (e.g., RMarkdown)
# if (file.exists(pngfile)) {
#   knitr::include_graphics(pngfile)
# }

# ==== REAL PDF (VECTOR) ====
pdffile <- fs::path(path, paste0(Group, Out, "BoxB.pdf"))
cairo_pdf(pdffile, width = 60/2.54, height = 36/2.54)
print(p)
dev.off()


}

# Example usage
LogCovariatesNonScaledData[, '11-KA4'][LogCovariatesNonScaledData[, '11-KA4'] ==
                                         min(LogCovariatesNonScaledData[, '11-KA4'])] <- median(LogCovariatesNonScaledData[, '11-KA4'])
# other <- '261124'
ie <- LogCovariatesNonScaledData#

# This is partly auxiliary
# setwd("C:/Users/patati/Desktop/Turku/R")
# or:"C:/Users/patati/Desktop/Turku/R" 
# groups2=data.frame(steroidNames)[,1:4]; g1=read.csv("C:/Users/patati/Desktop/Turku/R/groups_17823.csv", header = TRUE, sep=";")
# groups2=cbind(g1[,'Group'], groups2[,c('Abbreviation','Abbreviation_old','Name')])
# groups2=groups2[groups2[,'Abbreviation']!='FF',]; colnames(groups2)[1]="Group";
# groups2=groups2[order(groups2[,'Group']),]
# groups2[,'Group'][groups2[,'Abbreviation_old']=='DOC']='Mineralocorticoids'
# 
# groups2[,'Abbreviation'][groups2[,'Abbreviation']=='17aOH-P4']='17a-OHP4'
# #The Further Abbreviations:
groups3=groups2
groups3[groups3[,1]=='Glucocorticoids',1]  ='Glucoc.' #'Glucoc.' G.C.'
groups3[groups3[,1]=='Mineralocorticoids',1]  ='Mineralc.' #'Miner.c.' M.C.
#groups2[groups2[,1]=='Progestogens',1]  ='P.G.' #''
groups2[groups2[,1]=='Estrogens',1]  ='Estrog.' #''

setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")
hupo <- match(colnames(ie)[colnames(ie) %in% groups2[, 3]], groups2[, 3])
colnames(ie)[colnames(ie) %in% groups2[, 3]] <- groups2[hupo, 2]
windowsFonts(A = windowsFont("Calibri (Body)"))
knitr::opts_knit$set(root.dir = 'C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis')

# The significance levels are: '****<0.001', '***<0.01', '**<0.05', '*<0.1'
OutcomeVariables='Steatosis Grade';Out='Steatosis'; oute='Steatosis Grade';num=0;Group='All';
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other);
## png 
##   2
Group='Female';
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other);Group='Male';
## png 
##   2
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other)
## png 
##   2
OutcomeVariables='Fibrosis Stage';Out='Fibrosis'; oute='Fibrosis Stage';num=0;Group='All';
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other);Group='Female';
## png 
##   2
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other);Group='Male';
## png 
##   2
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other) 
## png 
##   2
# https://www.elsevier.es/en-revista-annals-hepatology-16-articulo-assessment-hepatic-fibrosis-necroinflammation-among-S1665268119314590 #So it is in grade
OutcomeVariables='Necroinflammation';Out='Necroinflammation'; oute='Necroinflammation Grade';num=0;Group='All';
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other);Group='Female';
## png 
##   2
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other);Group='Male';
## png 
##   2
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other)
## png 
##   2
OutcomeVariables='HOMA-IR';Out='HOMA-IR'; oute='HOMA-IR';num=1.5;Group='All';
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other);Group='Female';
## png 
##   2
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other);Group='Male';
## png 
##   2
CreateBoxplots(ie,Group,OutcomeVariables,Out,oute,other)
## png 
##   2

Making Forest Plots

# Define the NonAlcoholicFattyLiverDisease dataset by selecting the first 28 columns from CombinedData
NonAlcoholicFattyLiverDisease <- CombinedData[, 1:28]
# Convert specific columns to binary values using vectorized operations
cols_to_binary <- c(5, 6, 7)
NonAlcoholicFattyLiverDisease[, cols_to_binary] <- (NonAlcoholicFattyLiverDisease[, cols_to_binary] > 0) * 1
# Convert column 8 to binary based on the threshold of 1.5
NonAlcoholicFattyLiverDisease[, 8] <- (NonAlcoholicFattyLiverDisease[, 8] > 1.5) * 1
# Clean column names to remove special characters and make uniqueBMIValues consistent
patterns <- c("-", "/", "11", "17", "#")
replacements <- c(".", ".", "X11", "X17", ".") #?
# Ensure patterns and replacements are correctly paired
if (length(patterns) == length(replacements)) {
  for (i in seq_along(patterns)) {
    colnames(NonAlcoholicFattyLiverDisease) <- gsub(patterns[i], replacements[i], colnames(NonAlcoholicFattyLiverDisease))}} else {
      stop("Patterns and replacements vectors must be of the same length.")}

#' CalculateErrors
#' Builds ratio (case/control) with log CI, maps groups via groups2, creates forest plot, saves images.
#' @param NonAlcoholicFattyLiverDisease data.frame
#' @param OutcomeVariables character; binary outcome column in NAFLD data (0 vs >0)
#' @param Group character; "Male" | "Female" | "All"
#' @param name character; output file stem (e.g., "MyPlot")
#' @param ordera character vector of abbreviations to order rows; if NULL, inferred from groups2
#' @param oute (kept for compatibility; unused in this function)
#' @param first logical; when Group=="All" and first==TRUE, reverse order
#' @param e character; reference analyte used for ratio construction (kept from your code)
#' @param xlim numeric length-2 or NULL; x axis limits (log scale). If NULL, inferred from data
#' @param groups2 data.frame with columns: Group, Abbreviation, Abbreviation_old, Name
#' @return ordera vector used for the y order

CalculateErrors <- function(NonAlcoholicFattyLiverDisease,
                            OutcomeVariables,
                            Group,
                            name,
                            ordera = NULL,
                            oute = NULL,
                            first = FALSE,
                            e,
                            xlim = NULL,
                            groups2) {
  
  ## ---- Safety checks ----
  stopifnot(is.data.frame(NonAlcoholicFattyLiverDisease))
  if (!OutcomeVariables %in% colnames(NonAlcoholicFattyLiverDisease)) {
    stop("OutcomeVariables='", OutcomeVariables, "' not found in data.")
  }
  if (!'SEX.1F.2M' %in% colnames(NonAlcoholicFattyLiverDisease)) {
    stop("'SEX.1F.2M' column not found in data.")
  }
  needed_g2 <- c("Group", "Abbreviation", "Abbreviation_old", "Name")
  if (!all(needed_g2 %in% colnames(groups2))) {
    stop("groups2 must contain columns: ", paste(needed_g2, collapse = ", "))
  }
  
  ## ---- Helper: name normalization used for matching ----
  norm_key <- function(x) {
    x <- toupper(as.character(x))
    x <- gsub("^X(?=[0-9])", "", x, perl = TRUE) # drop leading X before digits
    gsub("[^A-Z0-9]", "", x)                     # strip non-alphanumerics
  }
  
  ## ---- 1) Filter by Group ----
  NAFLDo <- switch(Group,
                   "Male"   = NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[, 'SEX.1F.2M'] == 2, ],
                   "Female" = NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[, 'SEX.1F.2M'] == 1, ],
                   "All"    = NonAlcoholicFattyLiverDisease,
                   stop("Group must be one of 'Male', 'Female', 'All'."))
  if (nrow(NAFLDo) == 0) stop("No rows after filtering by Group = '", Group, "'.")
  
  ## ---- 2) Find analyte (steroid) columns dynamically via groups2 ----
  keys   <- unique(na.omit(c(groups2$Abbreviation, groups2$Abbreviation_old, groups2$Name)))
  keys_n <- norm_key(keys)
  cn     <- colnames(NAFLDo)
  cn_n   <- norm_key(cn)
  
  match_idx    <- which(cn_n %in% keys_n)
  measure_cols <- cn[match_idx]
  # Exclude non-measure columns
  measure_cols <- setdiff(measure_cols, c(OutcomeVariables, "SEX.1F.2M"))
  # Keep numeric
  measure_cols <- measure_cols[sapply(measure_cols, function(v) is.numeric(NAFLDo[[v]]))]
  
  if (length(measure_cols) == 0) {
    stop(
      "Could not find any analyte columns by matching groups2 to your data.\n",
      "Check that your data column names correspond to groups2 Abbreviation/Abbreviation_old/Name."
    )
  }
  
  ## ---- 3) Split by outcome and summarize ----
  sample_data <- vector("list", 2)
  n0 <- n1 <- 0
  
  for (i in 1:2) {
    SG0 <- if (i == 1) {
      NAFLDo[NAFLDo[[OutcomeVariables]] == 0, , drop = FALSE]
    } else {
      NAFLDo[NAFLDo[[OutcomeVariables]] > 0, , drop = FALSE]
    }
    if (i == 1) n0 <- nrow(SG0) else n1 <- nrow(SG0)
    if (nrow(SG0) == 0) stop("Outcome split ", i, " has zero rows. Check '", OutcomeVariables, "' coding.")
    
    means <- sapply(measure_cols, function(v) median(SG0[[v]], na.rm = TRUE))
    sds   <- sapply(measure_cols, function(v) sd(SG0[[v]],     na.rm = TRUE))
    
    sample_data[[i]] <- data.frame(
      study  = measure_cols,
      index  = measure_cols,
      result = as.numeric(means),
      error  = as.numeric(sds),
      stringsAsFactors = FALSE
    )
  }
  
  # Merge (control arm i=1 with case arm i=2) by analyte name
  df <- merge(sample_data[[1]], sample_data[[2]], by = c("study", "index"), suffixes = c("", ".1"))
  
  ## ---- 4) Wilcoxon p-values per analyte (vector interface, avoids formula pitfalls) ----
  ov <- NAFLDo[[OutcomeVariables]]
  ctrl_rows <- ov == 0
  case_rows <- ov > 0
  
  ps <- sapply(measure_cols, function(v) {
    x <- NAFLDo[ctrl_rows, v]
    y <- NAFLDo[case_rows, v]
    x <- x[is.finite(x)]
    y <- y[is.finite(y)]
    if (length(x) < 1 || length(y) < 1) return(NA_real_)
    tryCatch(wilcox.test(x, y, exact = FALSE)$p.value, error = function(e) NA_real_)
  })
  names(ps) <- measure_cols
  ## ---- 5) Ratio/log values & cleanup ----
  # Keep your 'e' reference ratio (not used downstream, but retained)
  suppressWarnings({
    a <- tryCatch(
      df[df[, "study"] == e, 'result.1'] / df[df[, "study"] == e, 'result'],
      error = function(e) NA_real_
    )
  })
  
  ResComb <- data.frame(log(df$result.1 / df$result))
  ResComb$result   <- ResComb[, 1]
  ResComb$name_raw <- df$study
  ResComb          <- ResComb[, c("result", "name_raw")]
  
  # Pretty names for plotting (match your prior cleanup)
  ResComb$name <- ResComb$name_raw
  ResComb$name <- gsub("\\.", "-", ResComb$name)
  ResComb$name <- sub("^X11", "11", ResComb$name)
  ResComb$name <- sub("^X17", "17", ResComb$name)
  ResComb$name[ResComb$name == "T-Epi-T"] <- "T/Epi-T"
  
  # Attach p‑values by original raw colname
  ResComb$pval <- ps[match(ResComb$name_raw, names(ps))]
  
  ## ---- 6) Error / CI on ratio (as in your code) ----
  ResComb$result_pure <- df$result.1 / df$result
  ResComb$error <- (abs((1 / df$result) * df$error.1) +
                      abs((df$result.1 / (df$result^2)) * df$error)) / nrow(NAFLDo) * 1.64
  
  # Cap extremes
  medE <- median(ResComb$error, na.rm = TRUE)
  sdE  <- sd(ResComb$error,     na.rm = TRUE)
  ResComb$error <- ifelse(ResComb$error > (medE + sdE), medE * 1.25, ResComb$error)
  
  # Bounds + log transform (guard negative/zero to avoid -Inf)
  ResComb$errord1a <- pmax(ResComb$result_pure - ResComb$error, .Machine$double.eps)
  ResComb$errord2a <- pmax(ResComb$result_pure + ResComb$error, .Machine$double.eps)
  ResComb$errord1  <- log(ResComb$errord1a)
  ResComb$errord2  <- log(ResComb$errord2a)
  ResComb$result   <- log(ResComb$result_pure)
  ResComb$Control  <- df$result
  ResComb$Case     <- df$result.1
  # P-values & flags
  ResComb$pval0 <- ResComb$pval
  ResComb$pval1 <- ResComb$pval
  ResComb$Significance0 <- ifelse(ResComb$pval0 < 0.1, 'Yes', 'No')
  ResComb$Color0        <- ifelse(ResComb$pval0 < 0.1, 'blue', 'grey')
  ResComb$Significance1 <- ifelse(ResComb$pval1 < 0.1, 'Yes', 'No')
  ResComb$Color1        <- ifelse(ResComb$pval1 < 0.1, 'blue', 'grey')
  
  ## ---- 7) Map Group via groups2 (match by Abbrev / old / Name) ----
  key_tbl <- unique(do.call(rbind, list(
    data.frame(key = as.character(groups2$Abbreviation),     Group = groups2$Group, stringsAsFactors = FALSE),
    data.frame(key = as.character(groups2$Abbreviation_old), Group = groups2$Group, stringsAsFactors = FALSE),
    data.frame(key = as.character(groups2$Name),             Group = groups2$Group, stringsAsFactors = FALSE)
  )))
  key_tbl <- key_tbl[!is.na(key_tbl$key) & nzchar(key_tbl$key), ]
  key_tbl$key_norm <- norm_key(key_tbl$key)
  ResComb$key_norm <- norm_key(ResComb$name)
  
  map_df <- merge(ResComb[, c("name", "key_norm")],
                  unique(key_tbl[, c("key_norm", "Group")]),
                  by = "key_norm", all.x = TRUE)
  ResComb$Group <- map_df$Group[match(ResComb$key_norm, map_df$key_norm)]
  
  # Warn on unmapped
  unmapped <- sort(unique(ResComb$name[is.na(ResComb$Group)]))
  if (length(unmapped)) {
    warning("Some analytes could not be mapped to a group in groups2: ",
            paste(unmapped, collapse = ", "))
  }
  
  # Facet order: as in groups2
  group_levels <- unique(groups2$Group)
  ResComb$Group <- factor(ResComb$Group, levels = group_levels)
  
  ## ---- 8) Order y-axis levels (names) ----
  if (is.null(ordera)) {
    g2_abbr  <- groups2$Abbreviation
    ordera_n <- norm_key(g2_abbr)
    present  <- g2_abbr[ordera_n %in% unique(ResComb$key_norm)]
    ordera   <- present
    if (identical(Group, "All") && isTRUE(first)) ordera <- rev(ordera)
  } else {
    if (identical(Group, "All") && isTRUE(first)) ordera <- rev(ordera)
  }
  
  # Convert ordera (abbreviations) to displayed levels present in ResComb$name
  ordera_norm     <- norm_key(ordera)
  display_levels  <- ResComb$name[match(ordera_norm, ResComb$key_norm)]
  display_levels  <- display_levels[!is.na(display_levels)]
  if (length(display_levels) == 0) display_levels <- ResComb$name
  ResComb$name <- factor(ResComb$name, levels = unique(display_levels))
  
  # Numeric group index for strip fills
  ResComb$Group2 <- as.numeric(as.factor(ResComb$Group))
  
  ## ---- 9) xlim ----
  if (is.null(xlim) || length(xlim) != 2 || any(!is.finite(xlim))) {
    xlim <- range(c(ResComb$errord1, ResComb$errord2), na.rm = TRUE)
  }
  
  ## ---- 10) Build forest plot ----
  plote2 <- forestplot(
    df       = ResComb,
    estimate = result,
    se       = 0,
    pvalue   = pval1,
    psignif  = 0.1,
    xlim     = xlim,
    xlab     = 'Logged Ratio between Raw Concentrations of Case and Control with 90% CI',
    ylab     = 'Steroid Groups',
    title    = '',
    colour   = Significance1
  ) +
    # ggforce::facet_col(facets = ~Group, scales = "free_y",  space = "free",strip.position = 'left') + #space = "free",
   # ggforce::facet_col(facets = ~Group, scales = "free_y", strip.position = 'left')+
  # facet_grid(
  #   Group ~ .,
  #   scales = "free_y",
  #   space = "free_y")+ #strip.position = 'left'

    geom_errorbarh(aes(xmin = errord1, xmax = errord2, height = .0, colour = Significance1))
  
  # Point color palette
  hp <- if (sum(ResComb$Significance1 == 'Yes', na.rm = TRUE) == length(levels(ResComb$name))) {
    c('blue', 'blue')
  } else {
    c('#999999', 'blue')
  }
  
  # Try stripes transparency (guard in case layer indexing differs)
  try({ plote2$layers[[1]]$aes_params$odd <- "#00000000" }, silent = TRUE)
  
  PlotVar  <- plote2 + theme(axis.text.y = element_blank()) + theme_classic2()
  PlotVar2 <- PlotVar +
    geom_point(aes(colour = factor(Significance1)), colour = ResComb$Color1) +
    scale_color_manual(values = hp) +
    theme(legend.position = "none") +
    theme(strip.text.y = element_text(size = -Inf))
  
  # Customize facet strip colors
  # g      <- ggplot_gtable(ggplot_build(PlotVar2))
  # stripr <- which(grepl('strip-l', g$layout$name))
  # fills  <- c("red", "green", "blue", "yellow", "brown")
  # if (length(stripr) > 0) {
  #   fills <- rep(fills, length.out = length(stripr))
  #   for (i in seq_along(stripr)) {
  #     j <- tryCatch(which(grepl('rect', g$grobs[[stripr[i]]]$grobs[[1]]$childrenOrder)),
  #                   error = function(e) integer(0))
  #     if (length(j) > 0) {
  #       g$grobs[[stripr[i]]]$grobs[[1]]$children[[j]]$gp$fill <- fills[i]
  #     }
  #   }
  # }
  
  jpeg(paste0(name, "divi.jpg"), width = 7500, height = 11000, quality = 100, pointsize = 16, res = 1000)
  print(PlotVar2)
  dev.off()
  
  ## ---- 11) Save outputs ----
  setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")
  
  # jpeg(paste0(name, "divi.jpg"), width = 7500, height = 11000, quality = 100, pointsize = 16, res = 1000)
  # print(grid::grid.draw(g))
  # dev.off()
  
  # daiR::image_to_pdf(paste0(name, "divi.jpg"), pdf_name = paste0(paste0(name, "divi.jpg"), '.pdf'))
  # my_image <- magick::image_read(paste0(name, "divi.jpg"))
  # my_svg   <- magick::image_convert(my_image, format = "svg")
  # magick::image_write(my_svg, paste0(name, "divi.svg"))
  # 
  # library(ggplot2)

# PDF (true vector)
ggsave(
  filename = paste0(name, "divi.pdf"),
  plot     = PlotVar2,
  device   = cairo_pdf,   # ensures embedded fonts + vector
  width    = 7.5,
  height   = 11,
  units    = "in"
)

# SVG (true vector)
ggsave(
  filename = paste0(name, "divi.svg"),
  plot     = PlotVar2,
  device   = "svg",
  width    = 7.5,
  height   = 11,
  units    = "in"
)

  
  return(ordera)
}



# This is with first(!!). Use it. 
OutcomeVariables='Steatosis.Grade.0.To.3';Out='Steatosis'; oute='Steatosis';first=TRUE; e='P4';ordera=c();
Group='All';name1=paste("Forest plot of",Group, "Steroid Ratios in",Out);
hel=CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name1,ordera,oute,first,e,xlim, groups2)
# #Afterwards:
first=FALSE;
Group='Female';name2=paste("Forest plot of",Group, "Steroid Ratios in",Out);
CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name2,ordera,oute,first,e,xlim, groups2)
Group='Male'; name3=paste("Forest plot of",Group, "Steroid Ratios in",Out);
CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name3,ordera,oute,first,e,xlim, groups2)
# 
OutcomeVariables='Fibrosis.Stage.0.to.4'; Out='Fibrosis';oute='Fibrosis';
Group='All'; name4=paste("Forest plot of",Group, "Steroid Ratios in",Out);
CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name4,ordera,oute,first,e,xlim, groups2)
Group='Female';name5=paste("Forest plot of",Group, "Steroid Ratios in",Out);
CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name5,ordera,oute,first,e,xlim, groups2)
Group='Male'; name6=paste("Forest plot of",Group, "Steroid Ratios in",Out);
CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name6,ordera=hel,oute,first,e,xlim, groups2)
# 
OutcomeVariables='Necroinflammation'; Out='Necroinflammation';oute='Necroinflammation';
Group='All'; name7=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name7,ordera=hel,oute,first,e,xlim, groups2) #not the very first though...
Group='Female';name8=paste("Forest plot of",Group, "Steroid Ratios in",Out);
CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name8,ordera=hel,oute,first,e,xlim, groups2)
Group='Male'; name9=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name9,ordera=hel,oute,first,e,xlim, groups2)
# 
OutcomeVariables='HOMA.IR';Out='HOMA-IR';oute='HOMAIR';
Group='All';name10=paste("Forest plot of",Group, "Steroid Ratios in",Out);
CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name10,ordera=hel,oute,first,e,xlim, groups2) #not the very first though...
Group='Female';name11=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name11,ordera=hel,oute,first,e,xlim, groups2)
Group='Male'; name12=paste("Forest plot of",Group, "Steroid Ratios in",Out); 
CalculateErrors(NonAlcoholicFattyLiverDisease,OutcomeVariables,Group,name12,ordera=hel,oute,first,e,xlim, groups2)

##Alternative calculation and plotting style, you need to have the linear model results saved as excel: #'lms_tikka19324_v2.xlsx'


library(readxl)
library(ggplot2)
library(ggh4x)   # per-facet y-limits, themed strips, facet_grid2(..., space='free_y')
library(grid)
library(magick)

setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")


build_fixed_orders <- function(groups2, global_y_order = NULL) {
  stopifnot(all(c("Group","Abbreviation_old") %in% colnames(groups2)))
  
  fixed_group_levels <- unique(as.character(groups2$Group))  # facet row order
  
  if (is.null(global_y_order)) {
    global_y_order <- unique(as.character(groups2$Abbreviation_old))
  } else {
    global_y_order <- as.character(global_y_order)
  }
  
  group_to_abbr <- lapply(fixed_group_levels, function(gl) {
    grp_set <- as.character(groups2$Abbreviation_old[groups2$Group == gl])
    global_y_order[global_y_order %in% grp_set]
  })
  names(group_to_abbr) <- fixed_group_levels
  
  list(
    group_levels   = fixed_group_levels,
    group_to_abbr  = group_to_abbr,
    global_y_order = global_y_order
  )
}




CalculateErrors <- function(
    Group,
    name,
    groups2,
    lm_path,
    lm_sheet = "Steroids' adjusted lms",
    lm_condition = NULL,
    
    use_qval = FALSE,
    alpha = 0.10,
    se_multiplier = 0.5,
    strict_gender = TRUE,
    
    fixed_group_levels = NULL,
    fixed_y_by_group   = NULL,
    global_y_order     = NULL,
    
    group_strip_fills  = NULL,
    shorten_long_group_names = TRUE,
    
    strip_hjust = 0.50,
    strip_vjust = 0.30,
    strip_margin_left  = 6,
    strip_margin_right = 4,
    
    axis_title_size = 14,
    axis_text_size  = 12,
    strip_text_size = 12,
    title_face = "plain",
    text_face  = "plain",
    strip_face = "plain",
    
    device_width_px  = 8500,
    device_height_px = NULL,
    device_res       = 800,
    
    symmetric_x      = TRUE,
    x_breaks_n       = 7,
    x_nice_step      = 0.05,
    x_minor_width    = 0.025,
    x_expand         = c(0.02, 0.02),
    x_label_accuracy = 0.01,
    x_title_top_margin = 12,
    
    xlim = NULL,
    output_dir = getwd(),
    save_outputs = TRUE,
    return_plot = FALSE,
    ...
)
{
  # ================================================================
  # Helpers
  # ================================================================
  
  
  # ✅ FIX FOR NULL HEIGHT
  if (is.null(device_height_px)) {
    device_height_px <- 8000
  }
  
  
  norm_key <- function(x) {
    x <- toupper(as.character(x))
    x <- gsub("^X(?=[0-9])","", x, perl = TRUE)
    gsub("[^A-Z0-9]", "", x)
  }
  header_norm <- function(x) {
    x <- tolower(as.character(x))
    x <- gsub("[\r\n]+", " ", x); x <- gsub('"', "", x)
    x <- gsub("\\s+",  " ", x);  x <- trimws(x)
    gsub("[^a-z0-9]","", x)
  }
  parse_num <- function(x) {
    if (is.numeric(x)) return(x)
    x <- gsub("\\s+","", as.character(x))
    x <- gsub(",", ".", x)
    suppressWarnings(as.numeric(x))
  }
  infer_condition_from_name <- function(s) {
    if (is.null(s)) return(NA_character_)
    st <- tolower(s)
    if (grepl("steatosis", st)) return("Steatosis")
    if (grepl("fibrosis", st)) return("Fibrosis")
    if (grepl("necro[- ]?inflam", st)) return("Necroinflammation")
    if (grepl("homa", st)) return("HOMAIR")
    NA_character_
  }
  normalize_cond <- function(s) gsub("[^a-z0-9]","", tolower(as.character(s)))
  
  # ================================================================
  # Safety checks
  # ================================================================
  stopifnot(is.data.frame(groups2))
  req_cols <- c("Group","Abbreviation","Abbreviation_old","Name")
  stopifnot(all(req_cols %in% colnames(groups2)))
  if (!file.exists(lm_path)) stop("LM file not found: ", lm_path)
  
  gender_token <- switch(tolower(Group),
                         "all"="both",
                         "male"="male",
                         "female"="female",
                         "both")
  
  # ================================================================
  # Read LM file
  # ================================================================
  lm_raw <- readxl::read_excel(lm_path, sheet = lm_sheet, .name_repair = "minimal")
  if (nrow(lm_raw) == 0) stop("LM sheet empty.")
  
  cn_norm <- header_norm(colnames(lm_raw))
  idx_steroid <- which(cn_norm=="steroid")[1]
  idx_gender  <- which(cn_norm=="genderincondition")[1]
  idx_cond    <- which(cn_norm=="condition")[1]
  idx_coef    <- which(cn_norm=="coef")[1]
  idx_se      <- which(cn_norm=="stderr")[1]
  idx_pval    <- which(cn_norm=="pval")[1]
  idx_qval    <- which(cn_norm=="qval")[1]
  
  LM <- data.frame(
    Steroid      = trimws(as.character(lm_raw[[idx_steroid]])),
    GenderInCond = trimws(as.character(lm_raw[[idx_gender]])),
    Condition    = trimws(as.character(lm_raw[[idx_cond]])),
    Coef         = parse_num(lm_raw[[idx_coef]]),
    Stderr       = parse_num(lm_raw[[idx_se]]),
    Pval         = if (!is.na(idx_pval)) parse_num(lm_raw[[idx_pval]]) else NA_real_,
    Qval         = if (!is.na(idx_qval)) parse_num(lm_raw[[idx_qval]]) else NA_real_,
    stringsAsFactors = FALSE
  )
  LM <- LM[is.finite(LM$Coef) & is.finite(LM$Stderr), ]
  
  # ================================================================
  # Condition & gender filtering
  # ================================================================
  if (is.null(lm_condition)) {
    lm_condition <- infer_condition_from_name(name)
    if (is.na(lm_condition)) {
      conds <- sort(unique(LM$Condition))
      if (length(conds) == 1) lm_condition <- conds[1]
      else stop("Multiple conditions; specify lm_condition.")
    }
  }
  rows_cond <- normalize_cond(LM$Condition) == normalize_cond(lm_condition)
  
  gic_token <- sub(".*_", "", tolower(LM$GenderInCond))
  rows_gender <- gic_token == gender_token
  
  LM_sel <- LM[rows_cond & rows_gender, ]
  if (nrow(LM_sel) == 0) {
    if (strict_gender) stop("No LM rows for gender=", gender_token)
    LM_sel <- LM[rows_cond, ]
  }
  
  # ================================================================
  # Build data for plotting
  # ================================================================
  p_used <- if (use_qval && any(is.finite(LM_sel$Qval))) LM_sel$Qval else LM_sel$Pval
  
  Res <- data.frame(
    name = LM_sel$Steroid,
    result = LM_sel$Coef,
    se_lm = LM_sel$Stderr,
    pval1 = p_used,
    stringsAsFactors = FALSE
  )
  
  # ================================================================
  # Mapping analyte names to groups2
  # ================================================================
  map_tbl <- do.call(rbind, list(
    data.frame(alias=groups2$Abbreviation_old, Group=groups2$Group, AbbrevOld=groups2$Abbreviation_old),
    data.frame(alias=groups2$Abbreviation,     Group=groups2$Group, AbbrevOld=groups2$Abbreviation_old),
    data.frame(alias=groups2$Name,             Group=groups2$Group, AbbrevOld=groups2$Abbreviation_old)
  ))
  map_tbl <- map_tbl[!is.na(map_tbl$alias) & nzchar(map_tbl$alias), ]
  map_tbl$key_norm <- norm_key(map_tbl$alias)
  Res$key_norm     <- norm_key(Res$name)
  
  MapDF <- merge(
    Res[,c("name","key_norm")],
    unique(map_tbl[,c("key_norm","Group","AbbrevOld")]),
    by="key_norm", all.x=TRUE
  )
  
  Res$Group     <- MapDF$Group
  Res$AbbrevOld <- MapDF$AbbrevOld
  
  # Drop unmapped
  drop_mask <- is.na(Res$Group) | is.na(Res$AbbrevOld)
  if (any(drop_mask)) {
    warning("Dropped unmapped analytes: ",
            paste(sort(unique(Res$name[drop_mask])), collapse=", "))
    Res <- Res[!drop_mask, ]
  }
  
  # ================================================================
  # Error bars
  # ================================================================
  eb <- se_multiplier * Res$se_lm
  Res$errord1 <- Res$result - eb
  Res$errord2 <- Res$result + eb
  Res$Significance1 <- ifelse(Res$pval1 < alpha, "Yes", "No")
  
  # ================================================================
  # Fixed ordering logic
  # ================================================================
  if (is.null(fixed_group_levels) || is.null(fixed_y_by_group)) {
    
    fixed_group_levels <- unique(as.character(groups2$Group))
    
    if (is.null(global_y_order)) {
      global_y_order <- unique(as.character(groups2$Abbreviation_old))
    }
    
    fixed_y_by_group <- lapply(fixed_group_levels, function(gl){
      grp_set <- groups2$Abbreviation_old[groups2$Group == gl]
      global_y_order[global_y_order %in% grp_set]
    })
    names(fixed_y_by_group) <- fixed_group_levels
  }
  
  # label shortening
  shorten_label <- function(x) {
    x <- as.character(x)
    if (shorten_long_group_names)
      x <- sub("^Glucocorticoids$", "Glucoc.", x)
    x
  }
  group_label_map <- setNames(shorten_label(fixed_group_levels), fixed_group_levels)
  
  Res$Group <- factor(Res$Group, levels=fixed_group_levels)
  
  groups_frame <- do.call(rbind, lapply(fixed_group_levels, function(gl) {
    data.frame(
      Group = factor(gl, levels=fixed_group_levels),
      GroupDisp = factor(group_label_map[[gl]], levels=group_label_map[fixed_group_levels]),
      AbbrevOld = fixed_y_by_group[[gl]],
      stringsAsFactors = FALSE
    )
  }))
  ResFull <- merge(groups_frame, Res,
                   by=c("Group","AbbrevOld"),
                   all.x=TRUE, sort=FALSE)
  
  # ================================================================
  # X limits
  # ================================================================
  nice_up <- function(z, step=0.05) step * ceiling(z / step)
  
  if (is.null(xlim)) {
    if (symmetric_x) {
      max_abs <- max(abs(ResFull$errord1), abs(ResFull$errord2), na.rm=TRUE)
      xmax <- nice_up(max_abs, step=x_nice_step)
      xlim <- c(-xmax, xmax)
    } else {
      xlim <- range(c(ResFull$errord1, ResFull$errord2), na.rm=TRUE)
    }
  }
  
  # ================================================================
  # Strip color fix — theme() instead of strip_themed
  # ================================================================
  if (is.null(group_strip_fills)) {
    base_cols <- c("#1f77b4","#d62728","#2ca02c","#9467bd","#ff7f0e")
    group_strip_fills <- setNames(base_cols[seq_along(fixed_group_levels)],
                                  fixed_group_levels)
  }
  
  # we need a strip color per facet row, in the order of GroupDisp
  strip_bg_map <- group_strip_fills[group_label_map]
  
  # ================================================================
  # Build plot
  # ================================================================
  sig_cols <- c(No="#999999", Yes="blue")
  
  p <- ggplot2::ggplot(
    ResFull,
    aes(y = AbbrevOld, x = result, colour = Significance1)
  ) +
    geom_vline(xintercept = 0, linewidth = 0.3, colour = "#cccccc") +
    
    # ✅ ggplot2 4.0-compliant horizontal error bars
    geom_errorbar(
      aes(xmin = errord1, xmax = errord2),
      width = 0,
      na.rm = TRUE
    ) +
    
    geom_point(size = 1.8, na.rm = TRUE) +
    scale_colour_manual(values = sig_cols) +
    scale_x_continuous(
      limits = xlim,
      breaks = scales::pretty_breaks(n = x_breaks_n),
      minor_breaks = scales::breaks_width(x_minor_width),
      labels = scales::label_number(accuracy = x_label_accuracy),
      expand = expansion(mult = x_expand)
    ) +
    
    ggh4x::facet_grid2(
      rows = vars(GroupDisp),
      scales = "free_y",
      space = "free_y",
      switch = "y"
    ) +
    
    ggh4x::facetted_pos_scales(
      y = lapply(fixed_group_levels, function(gl) {
        scale_y_discrete(limits = fixed_y_by_group[[gl]], drop = FALSE)
      })
    ) +
    
    xlab("Linear Model Estimates (SE)") +
    ylab("Steroids") +
    
    theme_classic() +
    theme(
      axis.title.x = element_text(size=axis_title_size, face="bold",
                                  margin=margin(t=x_title_top_margin)),
      axis.title.y = element_text(size=axis_title_size, face="bold"),
      axis.text.x  = element_text(size=axis_text_size, face=text_face),
      axis.text.y  = element_text(size=axis_text_size, face=text_face),
      legend.position = "none",
      
      # ✅ NEW strip background fix — safe alternative
      strip.background.y = element_rect(
        fill = strip_bg_map,
        colour = NA
      ),
      
      strip.text.y.left = element_text(
        angle = 90,
        hjust = strip_hjust,
        vjust = strip_vjust,
        size  = strip_text_size,
        face  = strip_face,
        colour = "white",
        margin = margin(l = strip_margin_left, r = strip_margin_right)
      )
    )
  
  # ================================================================
  # Save outputs (NO MAGICK)
  # ================================================================
  if (save_outputs) {
    
    if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE)
    safe_stem <- gsub("[^A-Za-z0-9._-]+","_", name)
    
    width_in  <- device_width_px  / device_res
    height_in <- device_height_px / device_res
    
    # JPG
    ggsave(
      filename = file.path(output_dir, paste0(safe_stem, ".jpg")),
      plot = p,
      width = width_in,
      height = height_in,
      dpi = device_res,
      units = "in"
    )
    
    # PDF (vector)
    ggsave(
      filename = file.path(output_dir, paste0(safe_stem, ".pdf")),
      plot = p,
      width = width_in,
      height = height_in
    )
    
    # SVG (vector)
    ggsave(
      filename = file.path(output_dir, paste0(safe_stem, ".svg")),
      plot = p,
      width = width_in,
      height = height_in
    )
  }
  
  if (return_plot) return(list(plot = p, data = ResFull))
  invisible(NULL)
}

run_all_combinations <- function(
    groups2,
    lm_path,
    lm_sheet = "Steroids' adjusted lms",
    groups = c("All", "Female", "Male"),
    conditions = c("Steatosis", "Fibrosis", "Necroinflammation", "HOMA-IR"),
    name_prefix = "Forest plot of",
    global_y_order = NULL,
    use_qval = FALSE,
    alpha = 0.10,
    se_multiplier = 0.5,
    strict_gender = TRUE,
    group_strip_fills = NULL,
    shorten_long_group_names = TRUE,
    output_dir = getwd(),
    save_outputs = TRUE,
    return_plots = FALSE,
    verbose = TRUE,
    ...
) {
  stopifnot(is.data.frame(groups2))
  req_cols <- c("Group","Abbreviation","Abbreviation_old","Name")
  stopifnot(all(req_cols %in% colnames(groups2)))
  if (!file.exists(lm_path)) stop("LM results file not found: ", lm_path)
  
  ord <- build_fixed_orders(groups2, global_y_order = global_y_order)
  fixed_group_levels <- ord$group_levels
  fixed_y_by_group   <- ord$group_to_abbr
  global_y_order     <- ord$global_y_order
  
  logs  <- list(); plots <- list(); k <- 0
  for (g in groups) {
    for (cond in conditions) {
      k <- k + 1
      plot_name <- paste(name_prefix, g, "Steroid Coefs in", cond)
      if (isTRUE(verbose)) message(sprintf("[INFO] %s | %s", g, cond))
      out <- tryCatch({
        CalculateErrors(
          Group = g,
          name = plot_name,
          groups2 = groups2,
          lm_path = lm_path,
          lm_sheet = lm_sheet,
          lm_condition = cond,
          use_qval = use_qval,
          alpha = alpha,
          se_multiplier = se_multiplier,
          strict_gender = strict_gender,
          fixed_group_levels = fixed_group_levels,
          fixed_y_by_group   = fixed_y_by_group,
          global_y_order     = global_y_order,
          group_strip_fills  = group_strip_fills,
          shorten_long_group_names = shorten_long_group_names,
          output_dir = output_dir,
          save_outputs = save_outputs,
          return_plot = return_plots,
          ...
        )
      }, error = function(e) e)
      
      if (inherits(out, "error")) {
        msg <- conditionMessage(out)
        if (isTRUE(verbose)) message("[WARN] Skipped ", g, " | ", cond, " — ", msg)
        logs[[k]] <- data.frame(Group=g, Condition=cond, Status="error", Message=msg, stringsAsFactors=FALSE)
      } else {
        if (isTRUE(verbose) && isTRUE(save_outputs)) {
          message(sprintf("[OK] Saved to: %s", normalizePath(output_dir)))
        }
        logs[[k]] <- data.frame(Group=g, Condition=cond, Status="ok", Message="", stringsAsFactors=FALSE)
        if (isTRUE(return_plots)) plots[[paste(g, cond, sep = " | ")]] <- out$gtable
      }
    }
  }
  log_df <- do.call(rbind, logs)
  if (isTRUE(return_plots)) return(list(log = log_df, plots = plots))
  log_df
}



outdir <- file.path(getwd(), "forest_plots_fix")
dir.create(outdir, showWarnings = FALSE)

global_order <- c("11b-OHA4","11-KDHT","11-KT","11-KA4","A4","AN","DHEA","DHT","T/Epi-T",
                  "E2","E1","S","E","A","B","DOC","17a-OHP5","17a-OHP4","P5","P4")

my_strip_fills <- c(
  Androgens          = "#1f77b4",
  Estrogens          = "#d62728",
  Glucocorticoids    = "#2ca02c",
  Mineralocorticoids = "#9467bd",
  Progestogens       = "#ff7f0e"
)

# groups2red=groups2

# groups2red[groups2red[,1]=='Estrogens',1]='Estrog.'

# In case you need run just one case and gender/s:
# CalculateErrors(
#   Group = "All",
#   name  = "TEST_All_Steatosis4a",
#   groups2 = groups2,
#   lm_path = "lms_tikka19324_v2.xlsx",
#   lm_sheet = "Steroids' adjusted lms",
#   lm_condition = "Steatosis",
#   global_y_order = global_order,
#   group_strip_fills = my_strip_fills,
#   # label placement toward panel & margins
#   strip_vjust = 0.28, strip_margin_left = 8, strip_margin_right = 8,
#   # fonts
#   axis_title_size = 12, axis_text_size = 12, strip_text_size = 12,
#   # x-axis
#   symmetric_x = TRUE, x_breaks_n = 7, x_nice_step = 0.05,
#   x_minor_width = 0.025, x_expand = c(0.02,0.02), x_label_accuracy = 0.01,
#   # device
#   device_width_px = 5000, device_height_px = 8800, device_res = 800,
#   output_dir = outdir, save_outputs = TRUE, return_plot = FALSE
# )


#2) Batch
log_df <- run_all_combinations(
  groups2 = groups2,
  lm_path = "lms_tikka19324_v2.xlsx",
  lm_sheet = "Steroids' adjusted lms",
  groups = c("All","Female","Male"),
  conditions = c("Steatosis","Fibrosis","Necroinflammation","HOMA-IR"),
  name_prefix = "Forest plot of",
  global_y_order = global_order,
  group_strip_fills = my_strip_fills,
  # same visual knobs as above (they’ll pass through):
  strip_vjust = 0.28, strip_margin_left = 8, strip_margin_right = 8,
  # fonts
  axis_title_size = 14, axis_text_size = 12, strip_text_size = 12,
  # x-axis
  symmetric_x = TRUE, x_breaks_n = 7, x_nice_step = 0.05,
  x_minor_width = 0.025, x_expand = c(0.02,0.02), x_label_accuracy = 0.01,
  # device
  device_width_px = 5000, device_height_px = 8800, device_res = 800,
  output_dir = outdir, save_outputs = TRUE, return_plot = FALSE
)

log_df




# Fyi: I was able to revise some of the above codes with Copilot...
Forest plot of All Steroid Ratios in Steatosis

Forest plot of All Steroid Ratios in Steatosis

Forest plot of Female Steroid Ratios in Steatosis

Forest plot of Female Steroid Ratios in Steatosis

Forest plot of Male Steroid Ratios in Steatosis

Forest plot of Male Steroid Ratios in Steatosis

Forest plot of All Steroid Ratios in Fibrosis

Forest plot of All Steroid Ratios in Fibrosis

Forest plot of Female Steroid Ratios in Fibrosis

Forest plot of Female Steroid Ratios in Fibrosis

Forest plot of Male Steroid Ratios in Fibrosis

Forest plot of Male Steroid Ratios in Fibrosis

Making Chord Diagrams

# First the correlations for the chord diagrams (both male and female as well as total subjects):

library(extrafont)
# font_import()  # Run once to import system fonts
# loadfonts(device = "win")  # For Windows


# Copy the tvauxe to  CurrentData
CurrentData = tvauxe
# setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")
# knitr::opts_knit$set(root.dir = 'C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis')

# Match column names in CurrentData with the third column in groups2 and get their indices
hupo = match(colnames(CurrentData)[colnames(CurrentData) %in% groups2[, 3]], groups2[, 3])
# Replace matched column names in CurrentData with corresponding values from the second column in groups2
colnames(CurrentData)[colnames(CurrentData) %in% groups2[, 3]] = groups2[hupo, 2]
ok = colnames(CurrentData)

# Convert CurrentData to data frame
CurrentData = data.frame(CurrentData)
# Remove specific columns from CurrentData
CurrentData = CurrentData[, !colnames(CurrentData) %in% c('Total_TG', 'PFAS', "Perfluorodecyl.ethanoic.acid")]

# Separate data by gender
tvf = CurrentData[CurrentData[, 'Gender'] == min(CurrentData[, 'Gender']), 1:dim(CurrentData)[2]]
tvm = CurrentData[CurrentData[, 'Gender'] == max(CurrentData[, 'Gender']), 1:dim(CurrentData)[2]]

# Create a list of data frames for total, female, and male subjects
tvtest = list(CurrentData, tvf, tvm)

# Clean up column names in each data frame in tvtest
for (i in 1:3) {
  colnames(tvtest[[i]]) <- gsub("\\.", "-", colnames(tvtest[[i]]))
  colnames(tvtest[[i]]) <- gsub("X11", "11", colnames(tvtest[[i]]))
  colnames(tvtest[[i]]) <- gsub("X17", "17", colnames(tvtest[[i]]))
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "T-Epi-T"] = "T/Epi-T"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "Steatosis-Grade"] = "Steatosis Grade"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "Fibrosis-Stage"] = "Fibrosis Stage"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "17aOH-P4"] = "17a-OHP4"
  colnames(tvtest[[i]])[colnames(tvtest[[i]]) == "HOMA IR"] = "HOMA-IR"}

# Assign cleaned data frames back to CurrentData, tvf, and tvm
CurrentData = tvtest[[1]]
tvf = tvtest[[2]]
tvm = tvtest[[3]]

# Rename specific value in x4
x4[x4 == "X7.oxo.DCA_S"] = "X7-oxo-DCA_S"

colnames(CurrentData)[9:28]=colnames(LogCovariatesNonScaledData)[9:28]
colnames(tvf)[9:28]=colnames(LogCovariatesNonScaledData)[9:28]
colnames(tvm)[9:28]=colnames(LogCovariatesNonScaledData)[9:28]
ok = colnames(CurrentData)

# Calculate Spearman correlations for total subjects
dat = CurrentData
dat = dat %>% select(-c('PatientNumber'))  # Remove 'PatientNumber' column
resulta <- (rcorr(as.matrix(dat), type = c('spearman')))$r  # Calculate correlation matrix
colnames(resulta) = ok[2:dim(CurrentData)[2]]
rownames(resulta) = ok[2:dim(CurrentData)[2]]

# Calculate Spearman correlations for female subjects
dat = tvf
dat = dat %>% select(-c('PatientNumber', 'Gender'))  # Remove 'PatientNumber' and 'Gender' columns
resultaf <- (rcorr(as.matrix(dat), type = c('spearman')))$r
colnames(resultaf) = ok[3:dim(CurrentData)[2]]
rownames(resultaf) = ok[3:dim(CurrentData)[2]]

# Calculate Spearman correlations for male subjects
dat = tvm
dat = dat %>% select(-c('PatientNumber', 'Gender'))  # Remove 'PatientNumber' and 'Gender' columns
resultam <- (rcorr(as.matrix(dat), type = c('spearman')))$r
colnames(resultam) = ok[3:dim(CurrentData)[2]]
rownames(resultam) = ok[3:dim(CurrentData)[2]]

# Define column steroidGroups for different variables
at = colnames(resulta)[1:(length(x1) - 1)]  # Clinicals
bt = colnames(resulta)[(length(at) + 1):(length(at) + length(x2))]  # Steroids
ct = colnames(resulta)[(length(at) + length(bt) + 1):(length(at) + length(bt) + length(x3))]  # BA_l
dt = colnames(resulta)[(length(at) + length(bt) + length(ct) + 1):(length(at) + length(bt) + length(ct) + length(x4))]  # BA_s
et = colnames(resulta)[(length(at) + length(bt) + length(ct) + length(dt) + 1):(length(at) + length(bt) + length(ct) + length(dt) + length(x5))]  # PFAS
ft = colnames(resulta)[(length(at) + length(bt) + length(ct) + length(dt) + length(et) + 1):(length(at) + length(bt) + length(ct) + length(dt) + length(et) + length(x6))]  #

# Store lengths of each group
atl = length(at)
btl = length(bt)
ctl = length(ct)
dtl = length(dt)
etl = length(et)
ftl = length(ft)

# Set significance level for correlations
n_level = 0.03

# Filter correlations based on significance level for total subjects
Nrr = qpNrr(resulta, verbose = FALSE)
Nrr[is.na(Nrr)] = 1
cond = data.frame(as.matrix(Nrr < n_level))
RN = data.frame(resulta)
GraphMat = cond * RN
GraphMat = as.matrix(GraphMat)
resulta = GraphMat
colnames(resulta) = ok[2:dim(CurrentData)[2]]
rownames(resulta) = ok[2:dim(CurrentData)[2]]

# Filter correlations based on significance level for female subjects
Nrr = qpNrr(resultaf, verbose = FALSE)
Nrr[is.na(Nrr)] = 1
cond = data.frame(as.matrix(Nrr < n_level))
RN = data.frame(resultaf)
GraphMat = cond * RN
GraphMat = as.matrix(GraphMat)
resultaf = GraphMat

# Filter correlations based on significance level for male subjects
Nrr = qpNrr(resultam, verbose = FALSE)
Nrr[is.na(Nrr)] = 1
cond = data.frame(as.matrix(Nrr < n_level))
RN = data.frame(resultam)
GraphMat = cond * RN
GraphMat = as.matrix(GraphMat)
resultam = GraphMat

# Rename 'Gender' column to 'Sex(F-M+)' in correlation matrices
colnames(resulta)[colnames(resulta) == 'Gender'] <- 'Sex(F-M+)'
rownames(resulta)[rownames(resulta) == 'Gender'] <- 'Sex(F-M+)'

# Update column and row names in correlation matrices
# colnames(resulta)[2:dim(resulta)[2]] = ok[3:dim(CurrentData)[2]]
# rownames(resulta)[2:dim(resulta)[2]] = ok[3:dim(CurrentData)[2]]
# colnames(resultaf)[2:dim(resultaf)[2]] = ok[4:dim(CurrentData)[2]]
# rownames(resultaf)[2:dim(resultaf)[2]] = ok[4:dim(CurrentData)[2]]
# colnames(resultam)[2:dim(resultam)[2]] = ok[4:dim(CurrentData)[2]]
# rownames(resultam)[2:dim(resultam)[2]] = ok[4:dim(CurrentData)[2]]

# Function to create chord diagrams for different steroidGroups
CreateChordDiagrams <- function(vars, n_level, big, rem, modi, colt, gend, colors, a, b, c, d, e, f) {
  # vars=vars[[2]]
  classes <- 5
  tot <- rownames(resulta)[2:dim(resulta)[1]]
  range <- 1:(a + b + c + e + f)
  layout(matrix(1:1, 1, 1))
  title <- 'Sex'
  genders <- gend
  windowsFonts(A = windowsFont("Arial"))

  # windowsFonts(A = windowsFont("Calibri (Body)"))
  i <- 1
  GraphMat <- vars    
  
  # Set column and row names based on gender
    if (gend == 'All') {
    colnames(GraphMat) <- rownames(resulta)
    rownames(GraphMat) <- rownames(resulta) } else {
    colnames(GraphMat) <- rownames(resultaf)
    rownames(GraphMat) <- rownames(resultaf)}
  
  # Define steroidGroups for different variables
  g1 <- c(rep('Clinical', a), rep('Steroids', b), rep('Bile Acids', c), rep('Contaminants', e), rep('Lipids', f))
  
  # Remove self-correlations within each group
  GraphMat[1:a, 1:a] <- 0
  GraphMat[(a + 1):(a + b), (a + 1):(a + b)] <- 0
  GraphMat[(a + b + 1):(a + b + c), (a + b + 1):(a + b + c)] <- 0
  GraphMat[(a + b + c + 1):(a + b + c + e), (a + b + c + 1):(a + b + c + e)] <- 0
  GraphMat[(a + b + c + e + 1):(a + b + c + e + f), (a + b + c + e + 1):(a + b + c + e + f)] <- 0
  
  # Define group structure and color palette
  group <- structure(g1, names = colnames(GraphMat))
  grid.col <- structure(c(rep('#93c29f', a), rep('#a83277', b), rep('red', c), rep('grey', e), rep('black', f)),
                        names = rownames(GraphMat))
  
  # Filter and adjust correlation matrix
  GraphMat <- GraphMat[range, range]
  grid.col <- grid.col[range]
  g <- graph.adjacency(GraphMat, mode = "upper", weighted = TRUE, diag = FALSE)
  e <- get.edgelist(g)
  df <- as.data.frame(cbind(e, E(g)$weight))
  df[, 3] <- as.numeric(df[, 3])
  
  # Define color function for edges
  col_fun <- colorRamp2(c(-1, 0, 1), c("blue", 'white', "orange"), transparency = 0.25)
  # c("orange", "blue")
  
  # Remove specified elements from the data frame
  df <- df[!df$V1 %in% rem, ]
  df <- df[!df$V2 %in% rem, ]
  
  # Define legends for the plot
  
  # Define legends for the plot with larger font size
  # lgd_group <- Legend(at = gend, type = "points", legend_gp = gpar(col = colors, fontsize = 24, fontfamily = "Arial"), title_position = "topleft", title = title)
  # lgd_points <- Legend(at = unique(g1), type = "points", legend_gp = gpar(col = unique(grid.col), fontsize = 24), title_position = "topleft", title = "Class")
  
  
lgd_group  <- Legend(
  at = gend,
  type = "points",
  legend_gp = gpar(col = colors),
  title_position = "topleft",
  size = unit(6, "mm"),      # ✅ makes dots bigger
  background = NA,          # ✅ remove grey background  title_position = "topleft",
  title = title,
  title_gp = gpar(fontsize = 32, fontface = "bold", fontfamily = "Arial"),
  labels_gp = gpar(fontsize = 30, fontfamily = "Arial")
)

lgd_points <- Legend(
  at = unique(g1),
  type = "points",
  legend_gp = gpar(col = unique(grid.col)),
  size = unit(6, "mm"),      # ✅ makes dots bigger
  background = NA,          # ✅ remove grey background  title_position = "topleft",
  title = "Class",
  title_gp = gpar(fontsize = 32, fontface = "bold"),
  labels_gp = gpar(fontsize = 30,family = "Arial")
)

lgd_lines  <- Legend(
  at = c("Positive", "Negative"),
  type = "points",
  size = unit(6, "mm"),      # ✅ makes dots bigger
  background = NA,          # ✅ remove grey background  title_position = "topleft",
  legend_gp = gpar(col = c("orange", "blue")),
  title_position = "topleft",
  title = "Correlation",
  title_gp = gpar(fontsize = 32, fontface = "bold"),
  labels_gp = gpar(fontsize = 30, family = "Arial")
)

lgd_edges  <- Legend(
  at = c(-1, 0, 1),
  col_fun = col_fun,
  title_position = "topleft",
  title_gap = unit(4, "mm"),
  title = "Edges",
  title_gp = gpar(fontsize = 32, fontface = "bold"),
  labels_gp = gpar(fontsize = 30, family = "Arial")
)

lgd_list_vertical <- packLegend(lgd_group, lgd_points, lgd_lines, lgd_edges)


  
  
  # lgd_lines <- Legend(at = c("Positive", "Negative"), type = "points", legend_gp = gpar(col = c("orange", "blue"), fontsize = 24), title_position = "topleft", title = "Correlation")
  # lgd_edges <- Legend(at = c(-1, 1), col_fun = col_fun, title_position = "topleft", title = "Edges", legend_gp = gpar(fontsize = 24))
  # lgd_list_vertical <- packLegend(lgd_group, lgd_points, lgd_lines, lgd_edges)

     # jpeg(filename = paste0(gend, n_level, 'cd.jpg'), width = 17, height = 16.5, units = "in", res = 1000)

  
# ===== OPEN REAL SVG DEVICE =====
svgfile <- paste0(gend, n_level, "_cd.svg")

svglite::svglite(
  file = svgfile,
  width  = 37,   # inches (match your PNG width)
  height = 32
)



  # Set parameters for the chord diagram
  # circos.par(gap.after = 1.5, start.degree = 90)
  
  circos.par(
  gap.after = 1.5,
  start.degree = 90,
  
cell.padding = c(0, 0, 0, 0),  # ✅ remove inner padding
  track.margin = c(0.01, 0.01)  # ✅ allow thicker band
)
#   canvas.xlim = c(-1.2, 1.0),  # ⬅ leaves space on the right
#   canvas.ylim = c(-1.1, 1.1)
# )
  
#   circos.par(
#   gap.after = 1.5,
#   start.degree = 90,
#   canvas.xlim = c(-1.4, 0.8),
#   canvas.ylim = c(-1.1, 1.1)
# )
  
#   circos.par(
#   gap.after = 1.5,
#   start.degree = 90,
#   canvas.xlim = c(-1.4, 0.8),  # ⬅ aggressive left shift
#   canvas.ylim = c(-1.1, 1.1)
# )
  
  
# cell.padding = c(0, 0, 0, 0),  # ✅ remove inner padding
  # track.margin = c(0.01, 0.01)  # ✅ allow thicker band


par(lwd = 1.2)   # ✅ sector border thickness (SVG-visible)

  chordDiagram(df, annotationTrack = c("grid"), grid.col = grid.col, directional = FALSE, symmetric = TRUE, scale = FALSE,preAllocateTracks = list(track.height = 0.08),  # ✅ ADD THIS
               link.lwd = 0.3, link.border = "white", order = rownames(GraphMat),  col = col_fun, annotationTrackHeight = 0.08,    transparency = 0.25, big.gap = 10, small.gap = 1) #preAllocateTracks = list(track.height = 0.12),  # ✅ ADD THIS
  
  # Add text and axis to the plot
  circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
    xlim <- get.cell.meta.data("xlim")
    ylim <- get.cell.meta.data("ylim")
    sector.name <- get.cell.meta.data("sector.index")
    circos.text(mean(xlim), ylim[1] + .12, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5),cex = 3.0,font = 2, family = "Arial") #cex =1.57, font = 1
    circos.axis(h = "top", labels.cex = 0.000001, major.tick.length = 0.2, sector.index = sector.name, track.index = 2) #labels.cex = 0.000001, 1.2
  }, bg.border = NA)
  
  # Set font and draw legends
  # windowsFonts(A = windowsFont("Calibri (Body)"))
  windowsFonts(A = windowsFont("Arial"))
  # draw(lgd_list_vertical, x = unit(5, "mm"), y = unit(5, "mm"), just = c("right", "bottom"))
  
  draw(
  lgd_list_vertical,
  # x = unit(1, "npc") - unit(2, "mm"),
  x = unit(1, "npc") - unit(50, "mm"),   # right edge, pulled slightly inward
  y = unit(0.5, "npc"),                  # vertical center
  just = c("left", "center")
)
  
#   ComplexHeatmap::draw(
#   lgd_list_vertical,
#   annotation_legend_side = "right",
#   padding = unit(c(5, 20, 5, 5), "mm")  # ⬅ space for legend
# )
#   
  
# x = unit(1, "npc") - unit(2, "mm")
  
  # Save the plot as a JPEG file
  # dev.copy(jpeg, paste0(gend, n_level, 'cd.jpg'), width = 25, height = 22, units = "in", res = 1000,family = "Arial" )       # ✅ added)

  
  # dev.copy(png, paste0(gend, n_level, 'cd.png'),
  #        width = 36, height = 25, units = "in", res = 700)
  
#   dev.copy(
#   png,
#   paste0(gend, n_level, 'cd.png'),
#   width = 32,
#   height = 22,
#   units = "in",
#   res = 600
# )
  
    dev.off() #sometimes the male print do not work, then drive separately this, including different name
  
  # library(Cairo)
  # CairoJPEG(filename = paste0(gend, n_level, 'cd.jpg'), width = 17, height = 16.5, units = "in", res = 1000, family = "Arial")

  
  # Include the plot in the report and convert it PDF and SVG
  # knitr::include_graphics(paste0(gend, n_level, 'cd.jpg'))
  # daiR::image_to_pdf(paste0(gend, n_level, 'cd.jpg'), pdf_name = paste0(paste0(gend, n_level, 'cd'), '.pdf'))
  # my_image <- image_read(paste0(gend, n_level, 'cd.jpg'))
  # my_svg <- image_convert(my_image, format = "svg")
  # image_write(my_svg, paste(paste0(gend, n_level, 'cde'), ".svg"))
#   knitr::include_graphics(paste0(gend, n_level, 'cd.png'))
# 
# daiR::image_to_pdf(
#   paste0(gend, n_level, 'cd.png'),
#   pdf_name = paste0(gend, n_level, 'cd.pdf')
# )
# 
# my_image <- image_read(paste0(gend, n_level, 'cd.png'))
# my_svg <- image_convert(my_image, format = "svg")
# image_write(my_svg, paste0(gend, n_level, 'cde.svg'))
#   
#     
    
}

# For all 
n_level = n_level
circos.clear()

#All Variables
vars = list(resulta)
big = 'Yes'
title = 'All Variables'
rem = x4
modi = 5
colt = 'black'
a = length(x1) -1 #check this '-1' together with length of resulta and column names
b = length(x2)
c = length(x3)
d = length(x4)
e = length(x5)
f = length(x6)
gend = c('All')
colors = c('blue')
CreateChordDiagrams(vars[[1]], n_level, big, rem, modi, colt, gend, colors, a, b, c, d, e, f)
## png 
##   2
# Genderwise:
# gend = 'Female'
# # circos.clear()
# vars = list(resultaf, resultam)
# big = 'No'
# title = 'Genders Separated_v2'
# rem = x4
# modi = 4
# colt = 'black'
# a = length(x1) - 2
# b = length(x2)
# c = length(x3)
# d = length(x4)
# e = length(x5)
# f = length(x6)
# colors = c('white')
# CreateChordDiagrams(vars[[1]], n_level, big, rem, modi, colt, gend, colors, a, b, c, d, e, f)
# 
# gend = 'Male'
# # circos.clear()
# vars = list(resultaf, resultam)
# big = 'No'
# title = 'Genders Separated_v2'
# rem = x4
# modi = 4
# colt = 'black'
# a = length(x1) - 2
# b = length(x2)
# c = length(x3)
# d = length(x4)
# e = length(x5)
# f = length(x6)
# colors = c('black')
# circos.clear()
# CreateChordDiagrams(vars[[2]], n_level, big, rem, modi, colt, gend, colors, a, b, c, d, e, f)


#Copiloting is not working very well here, so I just let it comment some of the lines ...

Making Variance Explained Plots

# Some info regarding of making the data for the variance explained plot:
# This is it! https://bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/overview.html
# https://stats.stackexchange.com/questions/79399/calculate-variance-explained-by-each-predictor-in-multiple-regression-using-r
# https://rdrr.io/github/MRCIEU/TwoSampleMR/man/get_r_from_pn.html
# https://onlinestatbook.com/2/effect_size/variance_explained.html
# https://stackoverflow.com/questions/10441437/why-am-i-getting-x-in-my-column-names-when-reading-a-data-frame
# https://stackoverflow.com/questions/27044727/removing-characters-from-string-in-r

# This function does as its name is indicating:
PlotVarianceExplained <- function(AllData, Group) {
  # Initialize error flag
  an.error.occured <- FALSE
  tv_all2 <- AllData
  
  # Check if 'Gender' column exists
  tryCatch({
    tv_all2[, 'Gender']
  }, error = function(e) {
    an.error.occured <<- TRUE
  })
  
  # Determine condition based on Group and presence of 'Gender' column
  cond <- if (an.error.occured) {
    if (Group == 'female') {
      tv_all2[, 'SEX.1F.2M'] == min(tv_all2[, 'SEX.1F.2M'])
    } else if (Group == 'male') {
      tv_all2[, 'SEX.1F.2M'] == max(tv_all2[, 'SEX.1F.2M'])
    } else {
      rep(TRUE, nrow(tv_all2))
    }
  } else {
    if (Group == 'female') {
      tv_all2[, 'Gender'] == min(tv_all2[, 'Gender'])
    } else if (Group == 'male') {
      tv_all2[, 'Gender'] == max(tv_all2[, 'Gender'])
    } else {
      rep(TRUE, nrow(tv_all2))
    }
  }
  
  # Filter data based on condition
  tv_red <- tv_all2[cond, ]
  RelevantColumns <- tv_red
  colnames(RelevantColumns)[1:8] <- colnames(tv_red)[1:8]
  
  # Transpose the data for SingleCellExperiment
  tv2 <- t(RelevantColumns[, 9:ncol(tv_red)])
  
  # Create SingleCellExperiment object
  # sce <- SingleCellExperiment(tv2)
  sce <- SingleCellExperiment(assays = list(logcounts = tv2))
  # logcounts(sce) <- tv2
  # sce@colData <- DataFrame(RelevantColumns[, 2:8])
  colData(sce) <- S4Vectors::DataFrame(RelevantColumns[, 2:8])
  colnames(colData(sce)) <- colnames(AllData)[2:8]
  colnames(colData(sce))[1] <- 'The Gender'

  # Calculate variance explained
  # vars <- getVarianceExplained(sce, variables = colnames(colData(sce))[1:7])
  vars <- getVarianceExplained(
  sce,
  variables = colnames(colData(sce))[1:7])
  colVars(vars)

  # Set font and color palette
  # windowsFonts(A = windowsFont("Calibri (Body)"))
  windowsFonts(A = windowsFont("Times New Roman"))
  mypalette <- scales::hue_pal()(ncol(colData(sce)))
  names(mypalette) <- colnames(AllData)[2:8]
  
  # Adjust vars if Group is not 'All'
  if (Group != 'All') {
    vars <- vars[, 2:7]
  }
  
  # Plot explanatory variables
  p <- plotExplanatoryVariables(vars) +
    # theme(text = element_text(size = 25, family = "Calibri")) +
    theme(text = element_text(size = 25, family = "Times New Roman")) +
    theme(axis.text = element_text(size = 20, family = "Calibri"))
  
  # Clean up plot data
  # p[[1]] <- p[[1]][!is.na(p[[1]][, 1]), ]
  # p[[1]][, 1] <- as.vector(unlist(p[[1]][, 1]))
  # p[[1]] <- p[[1]][order(p[[1]][, 1]), ]
  
  p$data <- p$data[!is.na(p$data[,1]), ]
  p$data[,1] <- as.vector(p$data[,1])
  p$data <- p$data[order(p$data[,1]), ]

  # Save plot as PNG
  path <- "C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"
  sips <- paste0(Group, 'vex', ".png")
  pngfile <- fs::path(path, sips)
  agg_png(pngfile, width = 60, height = 36, units = "cm", res = 300, scaling = 2)
  plot(p)
  invisible(dev.off())
  
  # Include plot in report
  knitr::include_graphics(pngfile)
  
  # Convert image to PDF and SVG
  # daiR::image_to_pdf(sips, pdf_name = paste0(sips, '.pdf'))
  # my_image <- image_read(sips)
  # my_svg <- image_convert(my_image, format = "svg")
  # image_write(my_svg, paste(sips, ".svg"))
  
  library(ggplot2)
library(svglite)

path <- "C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"
base <- paste0(Group, "vex")

# ----- PDF (true vector) -----
ggsave(
  filename = file.path(path, paste0(base, ".pdf")),
  plot     = p,
  device   = cairo_pdf,
  width    = 60,
  height   = 36,
  units    = "cm"
)

# ----- SVG (true vector) -----
ggsave(
  filename = file.path(path, paste0(base, ".svg")),
  plot     = p,
  device   = svglite,
  width    = 60,
  height   = 36,
  units    = "cm"
)
  


  
  
  
  p
  }

# Example usage:
PlotVarianceExplained(AllData, Group = 'All') #ok :)

PlotVarianceExplained(AllData, Group = 'female')

PlotVarianceExplained(AllData, Group = 'male')

#Copilot helped a bit with the commenting here.
#Check

Making Heatmap with Effect Sizes

# Function to calculate Cohen's d effect sizes, which is based on https://www.statology.org/cohens-d-in-r/
CalculateCohensD <- function(NonAlcoholicFattyLiverDisease, CombinedData, Group, OutcomeVariables) {
  
  # Filter data based on gender
  if (Group == 'Male') {
    NAFLDo <- NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[, 'Gender'] == max(NonAlcoholicFattyLiverDisease[, 'Gender']), ]
    tva <- CombinedData[CombinedData[, 'SEX.1F.2M'] == max(CombinedData[, 'SEX.1F.2M']), ]
  } else if (Group == 'Female') {
    NAFLDo <- NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[, 'Gender'] == min(NonAlcoholicFattyLiverDisease[, 'Gender']), ]
    tva <- CombinedData[CombinedData[, 'SEX.1F.2M'] == min(CombinedData[, 'SEX.1F.2M']), ]
  } else {
    NAFLDo <- NonAlcoholicFattyLiverDisease
    tva <- CombinedData}
  
  # Check if the OutcomeVariables column exists
  if (!OutcomeVariables %in% colnames(NAFLDo)) {
    stop("The specified OutcomeVariables column does not exist in the data frame.")}
  
  # Filter data based on outcome
  if (OutcomeVariables != 'HOMA-IR') {
    SG0 <- NAFLDo[NAFLDo[, OutcomeVariables] == min(NAFLDo[, OutcomeVariables]), ]
    SG1 <- NAFLDo[NAFLDo[, OutcomeVariables] > min(NAFLDo[, OutcomeVariables]), ]
  } else {
    SG0 <- NAFLDo[tva[, 'HOMA-IR'] <= 1.5, ]
    SG1 <- NAFLDo[tva[, 'HOMA-IR'] > 1.5, ]}
  
  # Initialize vector to store Cohen's d values
  cd <- numeric(20)
  
  # Calculate Cohen's d for each variable
  for (i in 1:20) {
    group1 <- SG0[, i + 8]
    group2 <- SG1[, i + 8]
    
    data <- data.frame(
      value = c(group1, group2),
      group = factor(rep(c("group1", "group2"), c(length(group1), length(group2)))))
    
    result <- cohen.d(value ~ group, data = data)
    cd[i] <- result$cohen.d[2]}
  
  return(cd)}

# Initialize an empty vector
d <- c()

# Define the dataset
NonAlcoholicFattyLiverDisease <- AllData

# Function to calculate Cohen's d for different steroidGroups and outcomes
calculate_cohd <- function(outcome) {
  Group <- 'All'
  a <- CalculateCohensD(NonAlcoholicFattyLiverDisease, CombinedData, Group, outcome)
  Group <- 'Female'
  b <- CalculateCohensD(NonAlcoholicFattyLiverDisease, CombinedData, Group, outcome)
  Group <- 'Male'
  c <- CalculateCohensD(NonAlcoholicFattyLiverDisease, CombinedData, Group, outcome)
  cbind(a, b, c)}

# Calculate Cohen's d for different outcomes and combine results
outcomes <- c('Steatosis Grade', 'Fibrosis Stage', 'Necroinflammation', 'HOMA-IR')
for (outcome in outcomes) {
  d <- cbind(d, calculate_cohd(outcome))}

# Set row names
rownames(d) <- colnames(AllData[, 9:28])

# Create column names with steroidGroups and outcomes
colnames(d) <- unlist(lapply(outcomes, function(outcome) {
  paste(c('All', 'Female', 'Male'), outcome, sep = "_")}))

# Save the results as CSV file
write.csv(d, paste0('cohens_da_', thedate, '.csv'))

# Convert the results as data frame
n <- d
x <- data.frame(n)
row.names(x) <- colnames(AllData[, 9:28])

# Adjust group names
steroidGroupse <- steroidGroups
# steroidGroupse[steroidGroupse[,'Abbreviation'] == "17aOH-P4",2] <- "17a-OHP4"
op <- steroidGroupse[order(steroidGroupse$Group), 'Abbreviation']
op <- op[op %in% row.names(x)]
x <- x[op, ]

# Function to create breaks for the heatmap
brks_heatmap <- function(mat, color_palette) {
  rng <- range(mat, na.rm = TRUE)
  lpal <- length(color_palette)
  c(seq(rng[1], 0, length.out = ceiling(lpal / 2) + 1),
    seq(rng[2] / dim(mat)[1], rng[2], length.out = floor(lpal / 2)))}

# Define color palette
color_palette <- colorRampPalette(c('blue', 'white', 'orange'), alpha = TRUE)(150)
c("orange", "blue")

# Save heatmap as JPEG
setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")
jpeg(paste0("cohensd_e2", date, ".jpg"), width = 9, height = 12, units = "in", res = 1000)

# Set viewport for the heatmap
setHook("grid.newpage", function() pushViewport(viewport(x = 1, y = 1, width = 0.9, height = 0.9, name = "vp", just = c("right", "top"))), action = "prepend")

# Generate heatmap
pheatmap(x, cluster_cols = FALSE, cluster_rows = FALSE, breaks = brks_heatmap(x, color_palette), color = color_palette, column_names_side = "bottom", angle_col = 90)

# Reset viewport hook
setHook("grid.newpage", NULL, "replace")

# Add text annotations
grid.text("Steatosis, Fibrosis, Necroinflammation, HOMA-IR", y = -0.07, x = 0.4, gp = gpar(fontsize = 16))
grid.text("Steroids (Androgens, Estrogens, Gluc., Mineraloc., Progestogens)", 
          x = -0.07, rot = 90, gp = gpar(fontsize = 16))

# Save the plot again for quality reasons
dev.copy(jpeg, paste0("cohensd_e2", date, ".jpg"), width = 9, height = 12, units = "in", res = 1000)
dev.off()

# # Include the image in the document
# knitr::include_graphics(paste0("cohensd_e2", date, ".jpg"));dev.off()
# 
# # Convert image to PDF
# daiR::image_to_pdf(paste0("cohensd_e2", date, ".jpg"), pdf_name = paste0("cohensd_e2", date, ".pdf"));#dev.off()
# 
# # Convert image to SVG
# my_image <- image_read(paste0("cohensd_e2", date, ".jpg"))
# my_svg <- image_convert(my_image, format = "svg")
# image_write(my_svg, paste0("cohensd_e2", date, ".svg"))

setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")

pdf(
  file   = paste0("cohensd_e2", date, ".pdf"),
  width  = 9,
  height = 12,
  useDingbats = FALSE
)

# viewport for layout
pushViewport(
  viewport(
    x = 1, y = 1,
    width = 0.9, height = 0.9,
    just = c("right", "top")
  )
)

pheatmap(
  x,
  cluster_cols = FALSE,
  cluster_rows = FALSE,
  breaks = brks_heatmap(x, color_palette),
  color  = color_palette,
  column_names_side = "bottom",
  angle_col = 90
)

# annotations
grid.text(
  "Steatosis, Fibrosis, Necroinflammation, HOMA-IR",
  y = -0.07, x = 0.4,
  gp = gpar(fontsize = 16)
)

grid.text(
  "Steroids (Androgens, Estrogens, Gluc., Mineraloc., Progestogens)",
  x = -0.07, rot = 90,
  gp = gpar(fontsize = 16)
)

dev.off()

library(svglite)

svglite(
  file   = paste0("cohensd_e2", date, ".svg"),
  width  = 9,
  height = 12
)

pushViewport(
  viewport(
    x = 1, y = 1,
    width = 0.9, height = 0.9,
    just = c("right", "top")
  )
)

pheatmap(
  x,
  cluster_cols = FALSE,
  cluster_rows = FALSE,
  breaks = brks_heatmap(x, color_palette),
  color  = color_palette,
  column_names_side = "bottom",
  angle_col = 90
)

grid.text(
  "Steatosis, Fibrosis, Necroinflammation, HOMA-IR",
  y = -0.07, x = 0.4,
  gp = gpar(fontsize = 16)
)

grid.text(
  "Steroids (Androgens, Estrogens, Gluc., Mineraloc., Progestogens)",
  x = -0.07, rot = 90,
  gp = gpar(fontsize = 16)
)

dev.off()

# Fyi: Revising with Copilot the above codes works partially...

Making Heatmaps with Correlations

# These codes show different ways to calculate and plot correlation coefficients with heatmaps

# Function to calculate correlation matrices
calculate_correlations <- function(data) {
  result <- rcorr(as.matrix(data), type = 'spearman')$r
  p.mat <- rcorr(as.matrix(data), type = 'spearman')$P
  p.mat[is.na(p.mat)] <- 1
  p.mat.adjusted <- matrix(p.adjust(p.mat, method = "BH"), nrow = dim(p.mat)[1], ncol = dim(p.mat)[2])
  rownames(p.mat.adjusted) <- rownames(p.mat)
  colnames(p.mat.adjusted) <- colnames(p.mat)
  result[result == 1] <- 0
  return(list(result = result, p.mat.adjusted = p.mat.adjusted))}

# Function to generate plots
generate_plots <- function(result, filename_prefix, n_level, order, range, corre, method,width = width, height = width) {
    # Print plot to Rmd screen
  corrplot(result, type = "lower", order = order, method = method, 
           tl.col = "black", tl.srt = 90, diag = FALSE, col = rev(COL2('RdBu')), is.corr = FALSE)
  jpeg(paste0(filename_prefix, n_level, order, range, corre, method, ".jpg"), width = 8000, 
       height = 8000, quality = 100, pointsize = 23, res = 300)
  corrplot(result, type = "lower", order = order, method = method, width = width, height = width, 
           tl.col = "black", tl.srt = 90, diag = FALSE, col = rev(COL2('RdBu')), is.corr = FALSE)
  dev.off()
  eoh <- paste0(filename_prefix, n_level, order, range, corre, method, ".jpg")
  daiR::image_to_pdf(eoh, pdf_name = paste0(eoh, '.pdf'))
  my_image <- image_read(eoh)
  my_svg <- image_convert(my_image, format = "svg")
  image_write(my_svg, paste0(eoh, ".svg"))}

# Prepare data
setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")
CurrentData <- CurrentData[, c(1:3, 4:(dim(CurrentData)[2]))]
CurrentData <- CurrentData[, !colnames(CurrentData) %in% c('Total_TG', 'PFAS', 'Perfluorodecyl.ethanoic.acid')]
CurrentData <- CurrentData[, !colnames(CurrentData) %in% x4]
colnames(CurrentData)[colnames(CurrentData) == "17aOH-P4"] <- "17a-OHP4"
tvf <- CurrentData[CurrentData[, 'Gender'] == min(CurrentData[, 'Gender']), 1:dim(CurrentData)[2]]
tvm <- CurrentData[CurrentData[, 'Gender'] == max(CurrentData[, 'Gender']), 1:dim(CurrentData)[2]]
n_level=1

# Calculate correlations
dat_list <- list(CurrentData, tvf, tvm)
correlation_results <- lapply(dat_list, function(dat) {
  dat <- dat[, !colnames(dat) %in% c('Gender', 'PatientNumber')]
  calculate_correlations(dat)})

# Extract specific results
resulta <- correlation_results[[1]]$result
p.mat.a <- correlation_results[[1]]$p.mat.adjusted
resultaf <- correlation_results[[2]]$result
p.mat.f <- correlation_results[[2]]$p.mat.adjusted
resultam <- correlation_results[[3]]$result
p.mat.m <- correlation_results[[3]]$p.mat.adjusted

# The correlations are ok with the above matrices. 
# http://www.sthda.com/english/wiki/visualize-correlation-matrix-using-correlogram
# In addition, squares are good for individual associations, because the order is the same
# More info regarding the function:
# https://jokergoo.github.io/circlize_book/book/legends.html
# https://cran.r-project.org/web/packages/ggplotify/vignettes/ggplotify.html
# https://bioinfo4all.wordpress.com/2021/03/13/tutorial-7-how-to-do-chord-diagram-using-r/
# https://jokergoo.github.io/circlize_book/book/advanced-usage-of-chorddiagram.html
# https://jokergoo.github.io/circlize_book/book/a-complex-example-of-chord-diagram.html

# Generate plots
order="original"; range='orig';corre='re_renormae'; method='color';width = 8000; height = 8000 
filenames <- c("Correlations with Full Plot of All_vok_nes", 
               "Correlations with Full Plot of Female_voek", 
               "Correlations with Full Plot of Male_voeka")

for (i in 1:length(correlation_results)) {
  generate_plots(correlation_results[[i]]$result, filenames[i], n_level, order, range, corre, method,width = width, height = width)}

# Additional processing
x1 <- colnames(correlation_results[[1]]$result)[c(1:6)]
x1 <- c(x1[3:6], x1[1], x1[2])
x5 <- x5[!x5 == 'Perfluorodecyl.ethanoic.acid']
colnames(correlation_results[[1]]$result)[colnames(correlation_results[[1]]$result) == "17aOH-P4"] <- "17a-OHP4"
colnames(correlation_results[[1]]$p.mat.adjusted)[colnames(correlation_results[[1]]$p.mat.adjusted) == "17aOH-P4"] <- "17a-OHP4"
# x2[x2 == "17aOH-P4"] <- "17a-OHP4"
# x2 <- x2[order(match(x2, steroidGroups[, 2]))]
x5 <- x5[!x5 == 'Perfluorodecyl.ethanoic.acid']

# Extract specific results (bile acids and lipids vs. PFAS)
resulta1 <- correlation_results[[1]]$result[c(x3, x6), x5]
p.mat.a1 <- correlation_results[[1]]$p.mat.adjusted[c(x3, x6), x5]
resulta2 <- correlation_results[[2]]$result[c(x3, x6), x5]
p.mat.f1 <- correlation_results[[2]]$p.mat.adjusted[c(x3, x6), x5]
resulta3 <- correlation_results[[3]]$result[c(x3, x6), x5]
p.mat.m1 <- correlation_results[[3]]$p.mat.adjusted[c(x3, x6), x5]

# Transpose results
resulta1 <- t(resulta1)
resulta2 <- t(resulta2)
resulta3 <- t(resulta3)
p.mat.a1 <- t(p.mat.a1)
p.mat.f1 <- t(p.mat.f1)
p.mat.m1 <- t(p.mat.m1)

# Apply threshold
apply_threshold <- function(res) {
  res[res > 0.4] <- 0.4
  res[res < -0.4] <- -0.4
  return(res)}

resulta1 <- apply_threshold(resulta1)
resulta2 <- apply_threshold(resulta2)
resulta3 <- apply_threshold(resulta3)

# Generate plots
# #https://www.rdocumentation.org/packages/corrplot/versions/0.92/topics/corrplot
# #https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
# #https://statisticsglobe.com/change-font-size-corrplot-r
# #order can be: alphabet, hclust, original #https://stackoverflow.com/questions/51115495/how-to-keep-order-of-the-correlation-plot-labels-as-same-in-the-datafile
order="original"; range='orig';corre='no_rendorm'; type='full'; 
method='color';ga='All';gf='Female';gm='Male' #color square
hip1='transpose';width = 2400;height=6000;pch.cex=1.2;ho='PFAS vs. BAs and lipids'
col = colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(100)
# c("orange", "blue")
cl.offset=1.0;cl.length=5;cl.cex = 1.3;
pch.cex=0.75;
pch=15;cl.pos = 'r';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b'
filenames <- c("Correlations with Full Plot of All_vok_nes", "Correlations with Full Plot of Female_voek", "Correlations with Full Plot of Male_voeka")

for (i in 1:length(correlation_results)) {
  generate_plots(correlation_results[[i]]$result, filenames[i], n_level, order, range, corre, method,width = width, height = width)}

# I need one set of data made manually:
CurrentData=CovariatesScaledData#tv_half_log22
colnames(CurrentData)=colnames(tvauxe)
CurrentData=CurrentData[,!colnames(CurrentData) %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]
CurrentData=CurrentData[,!colnames(CurrentData) %in% x4]
colnames(CurrentData)[colnames(CurrentData)=="17aOH-P4"]="17a-OHP4"
tvm=CurrentData[CurrentData[,'Gender']==max(CurrentData[,'Gender']),1:dim(CurrentData)[2]]
tvf=CurrentData[CurrentData[,'Gender']==min(CurrentData[,'Gender']),1:dim(CurrentData)[2]]
dat = CurrentData;
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] #SEX.1F.2M
resulta <- (rcorr(as.matrix(dat), type = c('spearman')))$r
p.mat.a=rcorr(as.matrix(dat), type = c('spearman'))$P;
p.mat.a[is.na(p.mat.a)]=1;
p.mat.aa=matrix(p.adjust(p.mat.a,method="BH"),nrow=dim(p.mat.a)[1],ncol=dim(p.mat.a)[2]);
rownames(p.mat.aa)=rownames(p.mat.a);colnames(p.mat.aa)=colnames(p.mat.a)
dat=tvf; #
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] #SEX.1F.2M
resultaf <- (rcorr(as.matrix(dat), type = c('spearman')))$r #compare pearson# intersect(colnames(resultaf), rownames(resultaf)) #https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
p.mat.f=rcorr(as.matrix(dat), type = c('spearman'))$P
p.mat.f[is.na(p.mat.f)]=1;
p.mat.ff=matrix(p.adjust(p.mat.f,method="BH"),nrow=dim(p.mat.f)[1],ncol=dim(p.mat.f)[2]);
rownames(p.mat.ff)=rownames(p.mat.f);colnames(p.mat.ff)=colnames(p.mat.f)
dat=tvm;  #
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] #dat= dat %>% select(-c('Gender')) #this is quite nice way to delete columns, please remember...
resultam <- (rcorr(as.matrix(dat), type = c('spearman')))$r #compare pearson # intersect(colnames(resultam), rownames(resultam)) #https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
p.mat.m=rcorr(as.matrix(dat), type = c('spearman'))$P
p.mat.m[is.na(p.mat.m)]=1;
p.mat.mm=matrix(p.adjust(p.mat.m,method="BH"),nrow=dim(p.mat.m)[1],ncol=dim(p.mat.m)[2]);
rownames(p.mat.mm)=rownames(p.mat.m);colnames(p.mat.mm)=colnames(p.mat.m)
resulta[resulta==1]=0
resultam[resultam==1]=0
resultaf[resultaf==1]=0;#min(resultaf);max(resultaf);

setwd("C:/Users/patati/Desktop/Turku/R")
groups=read.csv("groups_17823.csv", header = TRUE, sep=";")
setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")

#The ok ones:
x1=colnames(resulta)[c(1:6)]
x1=c(x1[3:6],x1[1],x1[2])#x1[2],
x5=x5[!x5=='Perfluorodecyl.ethanoic.acid']
colnames(resulta)[colnames(resulta)=="17aOH-P4"]="17a-OHP4"
colnames(p.mat.a)[colnames(p.mat.a)=="17aOH-P4"]="17a-OHP4"
x2=groups2[,'Abbreviation_old']
# x2[x2=="17aOH-P4"]="17a-OHP4"
# x2=x2[order(match(x2,groups2[,2]))] #https://stackoverflow.com/questions/1568511/how-do-i-sort-one-vector-based-on-values-of-another
x5=x5[!x5=='Perfluorodecyl.ethanoic.acid']

resulta11=resulta[c(x1,x3,x6),x2];  p.mat.a11=p.mat.aa[c(x1,x3,x6),x2]
resulta22=resultaf[c(x1,x3,x6),x2]; p.mat.f22=p.mat.ff[c(x1,x3,x6),x2]
resulta33=resultam[c(x1,x3,x6),x2]; p.mat.m33=p.mat.mm[c(x1,x3,x6),x2]

resulta11[resulta11 >0.5] = 0.5
resulta11[resulta11 < -0.5] = -0.5
resulta22[resulta22 >0.5] = 0.5
resulta22[resulta22 < -0.5] = -0.5
resulta33[resulta33 >0.5] = 0.5
resulta33[resulta33 < -0.5] = -0.5

# Function to generate and save plots
# generate_and_save_plots <- function(result, p.mat, filename, width, height, col, cl.cex, pch.cex, pch, cl.pos, cl.offset, cl.length) {
#   # 
#   p=corrplot(result, type = "full", order = "original", method = "color", p.mat = p.mat, 
#            tl.col = "black", cl.cex = cl.cex, pch.cex = pch.cex, pch.col = 'black', pch = pch, sig.level = c(.001, .05, .2), 
#            cl.pos = cl.pos, insig = "label_sig", cl.offset = cl.offset, cl.length = cl.length, tl.srt = 90, diag = TRUE, 
#            col = col, is.corr = FALSE, col.lim = c(-0.5, 0.5))
#   # dev.off()
#   # jpeg(filename, width = width, height = height, quality = 100, pointsize = 14, res = 300)
#   
#   # Save the plot again for quality reasons
#   dev.copy(jpeg, paste0(filename, date, ".jpg"), width = 9, height = 12, units = "in", res = 1000)
#   dev.off()
#   daiR::image_to_pdf(filename, pdf_name = paste0(filename, '.pdf'))
#   my_image <- image_read(filename)
#   my_svg <- image_convert(my_image, format = "svg")
#   image_write(my_svg, paste0(filename, ".svg"))
# }


generate_and_save_plots <- function(
  result,
  p.mat,
  filename,
  width = 9,
  height = 12,
  col,
  cl.cex = 1,
  pch.cex = 1,
  pch = 19,
  cl.pos = "r",
  cl.offset = 0,
  cl.length = 11,
  save_png = FALSE
) {

  ## ---- PDF (TRUE VECTOR) ----
  pdf(
    file   = paste0(filename, ".pdf"),
    width  = width,
    height = height,
    useDingbats = FALSE)

  corrplot(
    result,
    type       = "full",
    order      = "original",
    method     = "color",
    p.mat      = p.mat,
    sig.level  = c(.001, .05, .2),
    insig      = "label_sig",
    tl.col     = "black",
    tl.srt     = 90,
    diag       = TRUE,
    pch        = pch,
    pch.cex    = pch.cex,
    pch.col    = "black",
    cl.cex     = cl.cex,
    cl.pos     = cl.pos,
    cl.offset  = cl.offset,
    cl.length  = cl.length,
    col        = col,
    is.corr    = FALSE,
    col.lim    = c(-0.5, 0.5)
  )

  dev.off()


  ## ---- SVG (TRUE VECTOR) ----
  svglite::svglite(
    file   = paste0(filename, ".svg"),
    width  = width,
    height = height
  )

  corrplot(
    result,
    type       = "full",
    order      = "original",
    method     = "color",
    p.mat      = p.mat,
    sig.level  = c(.001, .05, .2),
    insig      = "label_sig",
    tl.col     = "black",
    tl.srt     = 90,
    diag       = TRUE,
    pch        = pch,
    pch.cex    = pch.cex,
    pch.col    = "black",
    cl.cex     = cl.cex,
    cl.pos     = cl.pos,
    cl.offset  = cl.offset,
    cl.length  = cl.length,
    col        = col,
    is.corr    = FALSE,
    col.lim    = c(-0.5, 0.5)
  )

  dev.off()


  ## ---- OPTIONAL PNG (RASTER; REPORTS ONLY) ----
  if (isTRUE(save_png)) {
    png(
      file   = paste0(filename, ".png"),
      width  = width,
      height = height,
      units  = "in",
      res    = 300
    )

    corrplot(
      result,
      type       = "full",
      order      = "original",
      method     = "color",
      p.mat      = p.mat,
      sig.level  = c(.001, .05, .2),
      insig      = "label_sig",
      tl.col     = "black",
      tl.srt     = 90,
      diag       = TRUE,
      pch        = pch,
      pch.cex    = pch.cex,
      pch.col    = "black",
      cl.cex     = cl.cex,
      cl.pos     = cl.pos,
      cl.offset  = cl.offset,
      cl.length  = cl.length,
      col        = col,
      is.corr    = FALSE,
      col.lim    = c(-0.5, 0.5)
    )

    dev.off()
  }

  invisible(TRUE)
}


# Generate additional plots with specific results
hip1 <- 'transpose'
width = 3200;height=2000
ho <- 'steroids vs. all others except PFAS'
ps <- 28
col <- colorRampPalette(c('blue', 'white', 'orange'), alpha = TRUE)(150)
# c("orange", "blue")
hip1='transpose'; ho='steroids vs. all others except PFAS';ps=12 #pch=10;

cl.offset <- 1.5
cl.length <- 5
cl.cex <- 0.7
pch.cex <- 0.7 #0.7 1.1
pch <- 16
cl.pos <- 'r'

# Loop for imaging plots
plot_data <- list(
  list(result = resulta11, p.mat = p.mat.a11, filename = paste("Square Correlation Plot ofrra", ho, "All", hip1, "3.jpg")),
  list(result = resulta22, p.mat = p.mat.f22, filename = paste("Square Correlation Plot ofa", ho, "Female", hip1, "3.jpg")),
  list(result = resulta33, p.mat = p.mat.m33, filename = paste("Square Correlation Plot ofa", ho, "Male", hip1, "3.jpg"))
  )

for (plot_info in plot_data) {
  generate_and_save_plots(plot_info$result, plot_info$p.mat, plot_info$filename, width, height, col, cl.cex, pch.cex, pch, cl.pos, cl.offset, cl.length)}

#FYI: Copilot did help a bit for the previous one and the code looks a bit neater.

Making Heatmaps with Linear Model Estimates

# This function calculates linear model estimates for selected 'max wanted' group, and they should be ok, 
# since they/results scale similarly to the correlations.
# In overall, you may need  rather big function to calculate the estimates and plot at the same time, 
# since the spaces of exper. interest have been reduced from the max dataset size.
CalculateEstimates=function(CombinedData,Group,ok,fn,adj,sig.level,sick,sick_group,joo) { #  ok,aa,bb
  CombinedData <- CovariatesScaledData
  if (Group == 'male') {
    NAFLDo <- CombinedData[CombinedData[, 'Gender'] == max(CombinedData[, 'Gender']), ]
  } else if (Group == 'female') {
    NAFLDo <- CombinedData[CombinedData[, 'Gender'] == min(CombinedData[, 'Gender']), ]
  } else if (Group == 'All') {
    NAFLDo <- CombinedData
  }
  
  SG0 <- NAFLDo[, c(2:dim(CombinedData)[2])]
  columnNames <- colnames(SG0)
  SG0 <- data.frame(SG0)
  colnames(SG0[, 8:27]) <- gsub("-", ".", colnames(SG0[, 8:27]))
  colnames(SG0[, 8:27]) <- gsub("/", ".", colnames(SG0[, 8:27]))
  
  lin_mod_summary <- data.frame()
  
  TreatmentVariables <- colnames(AllData)[52:58]
  MediatorVariables <- colnames(AllData)[9:28]
  OutcomeVariables <- colnames(AllData)[c(29:51, 59:71)]
  Treatment2 <- TreatmentVariables  # Define Treatment2
  
  xnam_list <- list(colnames(SG0)[c(4:7)], colnames(SG0)[c(2)], colnames(SG0)[c(3)], Treatment2, c(x3, x6), c('AGE', 'BMI', colnames(SG0)[c(4:7)]), c(x3, x6))
  y_list <- list(Treatment2, Treatment2, Treatment2, colnames(SG0[, 8:27]), colnames(SG0[, 8:27]), colnames(SG0[, 8:27]), colnames(AllData)[52:58])
  
  for (k in 1:length(xnam_list)) {
    xnam <- xnam_list[[k]]
    y <- y_list[[k]]
    for (i in 1:length(xnam)) {
      for (j in 1:length(y)) {
        if (Group != 'All') {
          fmla <- as.formula(paste(paste(c(y[j], " ~ "), collapse = ""), paste(c(xnam[i], 'BMI', 'AGE'), collapse = "+")))
        } else if (Group == 'All') {
          fmla <- as.formula(paste(paste(c(y[j], " ~ "), collapse = ""), paste(c(xnam[i], 'BMI', 'AGE', 'Gender'), collapse = "+")))
        }
        poissone <- lm(fmla, data = SG0)
        pss <- summary(poissone)[[4]]
        hoesh <- c(y[j], xnam[i], Group, pss[2, 1], pss[2, 4], pss[2, 2])
        lin_mod_summary <- rbind(lin_mod_summary, hoesh)
      }
    }
  }
  
  lin_mod_summary <- as.data.frame(lin_mod_summary)
  colnames(lin_mod_summary) <- c('y', 'x', 'Gender', 'r', 'p', 'var_x')
  
  hesa=lin_mod_summary
  hoi=as.data.frame(lin_mod_summary)
  
  
  lin_mod_summaries=hoi
  colnames(lin_mod_summaries)=c('y','x','Gender','r','p','var_x')
  colnames(hoi)=c('y','x','Gender','r','p','var_x')
  
  #This in case you want to print to your local computer: ... :)
  # main_dir <- paste0(c("C://Users//patati//Desktop//Turku//R//",fn),collapse="")
  main_dir <- paste0(c("C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//"),collapse="")
  setwd(main_dir)
  
  meds=names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)]
  covas=c('Steatosis.Grade','Fibrosis.Stage','Necroinflammation','HOMA.IR','AGE','BMI')
  
  if (adj=='ok') {
    # p.adjust(p=lin_mod_summaries[,5], method = 'BH', n = length(lin_mod_summaries[,5]))
    hoi[,5]=p.adjust(p=lin_mod_summaries[,5], method = 'BH', n = length(lin_mod_summaries[,5]))
    lin_mod_summaries[,5]=p.adjust(p=lin_mod_summaries[,5], method = 'BH', n = length(lin_mod_summaries[,5]))
  }
  
  if (ok=='big') {
    
    rsa=c();joi=c()
    
    # Two are still needed, i.e. ...
    # 1) BA/lipid=covar and steroids, and 2) steroid=covar

    meds=names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)]
    covas=c('Steatosis.Grade','Fibrosis.Stage','Necroinflammation','HOMA.IR','AGE','BMI')
    
    c1=hoi[,2] %in% covas
    c2=hoi[,1] %in% meds
    hyy=c1 & c2
    m1=hoi[hyy,]
    colnames(m1)=c('y','x','Gender','r','p','var_x') #c('y','x','Gender','r','p','radj')
    c1=hoi[,2] %in% c(x3,x6); c2=hoi[,1] %in% meds
    hyy=c1 & c2; m2=hoi[hyy,]
    colnames(m2)=c('y','x','Gender','r','p','var_x') # hist(as.numeric(m2[,6]),breaks=50)
    joi=rbind(m1,m2)
    i=4;rs=c()
    
    for (i in 4:5) {
      rs=joi[,c(1,2,i)] # rs=data.frame(rs)
      rs=reshape(rs,idvar="x",timevar="y",direction="wide")
      rownames(rs)=rs[,1]
      rs=rs[,-1]

      library(stringr)
      colnames(rs)=str_sub(colnames(rs),3,-1)
      
      colnames(rs) <- gsub("\\.", "-", colnames(rs))
      colnames(rs) <- gsub("X11", "11", colnames(rs))
      colnames(rs) <- gsub("X17", "17", colnames(rs))
      colnames(rs)[colnames(rs)=="T-Epi-T"]="T/Epi-T"
      
      rownames(rs)[rownames(rs)=="Steatosis.Grade"]="Steatosis Grade"
      rownames(rs)[rownames(rs)=="Fibrosis.Stage"]="Fibrosis Stage"
      rownames(rs)[rownames(rs)=="HOMA.IR"]="HOMA-IR"
      covas[covas=="Steatosis.Grade"]="Steatosis Grade"
      covas[covas=="Fibrosis.Stage"]="Fibrosis Stage"
      covas[covas=="HOMA.IR"]="HOMA-IR"
      
      AuxSteroidCheck=c(steroidGroups[,2]) #check that you have driven the steroid data vis file...
      AuxSteroidCheck[AuxSteroidCheck=="17aOH-P4"]="17a-OHP4"
      cme1=match(AuxSteroidCheck,colnames(rs))
      cme2=match(c(covas,x3,x6),rownames(rs))
      rs=rs[cme2,cme1]
      rsa=rbind(rsa,rs) }
    
    
    rs1a=rsa[1:dim(rs)[1],];
    rs2a=rsa[(dim(rs1a)[1]+1):(dim(rs1a)[1]+dim(rs1a)[1]),]
  
    rs1=rs1a;rs2=rs2a
    rownames(rs2)=str_sub(rownames(rs2), end = -2)
    rownames(rs1) <- gsub("\\.", " ", rownames(rs1))
    rownames(rs2) <- gsub("\\.", " ", rownames(rs2))
    rownames(rs1)[rownames(rs1)=="HOMA IR"]="HOMA-IR";rownames(rs2)[rownames(rs2)=="HOMA IR"]="HOMA-IR"
    
    rownames(rs1)[rownames(rs1)=="Gender"]="HOMA-IR";rownames(rs2)[rownames(rs2)=="HOMA IR"]="HOMA-IR"
    
    rango = function(x,mi,ma) {(ma-mi)/(max(x)-min(x))*(x-min(x))+mi}
    rs1 <- mutate_all(rs1, function(x) as.numeric(as.character(x)))
    rs2 <- mutate_all(rs2, function(x) as.numeric(as.character(x)))
    # rs1=rango(rs1,-0.5,0.5) #check this if needed
  
    rs1=as.matrix(rs1)
    rs2=as.matrix(rs2)
    
        rs1[rs1>0.5]=0.5;rs1[rs1 < -0.5]=-0.5;

    
    width=2500; height=4400
    order="original"; range='orig';corre='no_renorm'; type='full'; method='color';#ga='All';gf='Female';gm='Male' #color square
    cl.offset=1.0;cl.length=5;cl.cex = 1.09;pch.cex=1.27;pch=16;cl.pos = 'n';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b', cl.length=15
    ho=Group;hip1='BAs_lipids_as_y vs. steroids_as_x'

    # https://www.rdocumentation.org/packages/corrplot/versions/0.94/topics/corrplot
    # Oh! https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
    # https://r4ds.had.co.nz/graphics-for-communication.html#figure-sizing

#I have driven these separately for html: 
    # for that you need:
    setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")
    # jpeg(paste("Linear Model Estimate Plot of (big)",hip1,Group,".jpg"), width = width, height = height, quality = 100,pointsize = 16, res=300);

    hepio=colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150) #rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)])
    c("orange", "blue")
    
    
    # corrplot(rs1, type = type, order = order,method=method, p.mat=rs2, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,pch.col='black',pch=pch, ,sig.level = c(.001, .01, .05),cl.pos = cl.pos, 
    #          insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,tl.cex=0.9, tl.srt = 90, diag = TRUE, #tl.pos='n'
    #          col = hepio ,is.corr = FALSE,col.lim=c(-0.5,0.5));   #colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(100)
    #https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html#change-color-spectra-color-legend-and-text-legend
    
    # dev.off()  #,is.corr = FALSE
    # eoh=paste("Linear Model Estimate Plot of (big)",hip1,Group,".jpg")
    # daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'black.pdf'))
    # my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
    
    svgfile <- paste0(
  "Linear Model Estimate Plot of (big) ",
  hip1, " ", Group, ".svg"
)

# svg(
#   filename = svgfile,
#   width  = width / 72,   # pixels → inches
#   height = height / 72,
#   pointsize = 16
# )


# svg(
#   filename = svgfile,
#   width  = 13,    # inches
#   height = 10,
#   pointsize = 10
# )

svglite::svglite(
  file = svgfile,
  width  = 13,
  height = 10,
  pointsize = 10
)

corrplot(
  rs1,
  type = type,
  order = order,
  method = method,
  p.mat = rs2,
  tl.col = "black",
  cl.cex = cl.cex,
  pch.cex = pch.cex,
  pch.col = "black",
  pch = pch,
  sig.level = c(.001, .01, .05),
  insig = "label_sig",
  cl.offset = cl.offset,
  cl.length = cl.length,
  tl.cex = 0.9,
  tl.srt = 90,
  diag = TRUE,
  col = hepio,
  is.corr = FALSE,
  col.lim = c(-0.5, 0.5)
)

dev.off()

message("✅ REAL SVG saved: ", svgfile)

    
    

    #https://stackoverflow.com/questions/26574054/how-to-change-font-size-of-the-correlation-coefficient-in-corrplot
    #https://stackoverflow.com/questions/9543343/plot-a-jpg-image-using-base-graphics-in-r
    #oh, classical: https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2

    
  } else {
    
    rsa=c();rs1=c();rs2=c()
  
    c1=hoi[,1] %in% x5
    hoi[c1,] # This gives you the PFAS (x5) ok. This gives answers directly to the first seven PFASes (and not any other PFASes)
    c2=hoi[,1] %in% names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)] #There could be a better way thant his
    hyy=c1 & c2
    hoi2=hoi[c2 | c1 ,] #Likewise...
    rownames(hoi2)=1:dim(hoi2)[1]
    hoi2=hoi2[1:182,]
    a=hoi2[1:42,1];b=hoi2[1:42,2]
    hoi2[1:42,]=cbind(b,a,hoi2[1:42,3:5])    
    hoi2=hoi2[,c(2,1,3:5)] 
    
    i=4;
    rse=c()
    for (i in 4:5) {
      
      rse=hoi2[,c(1,2,i)] 
      rse=rse[order(rse[,1]),]
      rs=reshape(rse,idvar="x",timevar="y",direction="wide")
      rownames(rs)=rs[,1]
      rs=rs[,-1]
      
      library(stringr) 
      colnames(rs)=str_sub(colnames(rs),3,-1)

      colnames(rs) <- gsub("\\.", "-", colnames(rs))
      colnames(rs) <- gsub("X11", "11", colnames(rs))
      colnames(rs) <- gsub("X17", "17", colnames(rs))
      colnames(rs)[colnames(rs)=="T-Epi-T"]="T/Epi-T"
      colnames(rs)[colnames(rs)=="T-E-T"]="T/Epi-T"
      colnames(rs)[colnames(rs)=="Steatosis-Grade"]="Steatosis Grade"
      colnames(rs)[colnames(rs)=="Fibrosis-Stage"]="Fibrosis Stage"
      colnames(rs)[colnames(rs)=="17aOH-P4"]="17a-OHP4"
      AuxSteroidCheck=c(covas,steroidGroups[,2])
      AuxSteroidCheck <- gsub("\\.", " ", AuxSteroidCheck)
      AuxSteroidCheck[AuxSteroidCheck=="HOMA IR"]="HOMA-IR"
      AuxSteroidCheck[AuxSteroidCheck=="17aOH-P4"]="17a-OHP4"
      ccc=match(AuxSteroidCheck,colnames(rs))

      rs=rs[,ccc]
      rsa=rbind(rsa,rs) 
    }
    
    rs1a=rsa[1:7,];
    rs2a=rsa[8:14,]
    rs1=rs1a;rs2=rs2a

    rs1 <- mutate_all(rs1, function(x) as.numeric(as.character(x)))
    rs2 <- mutate_all(rs2, function(x) as.numeric(as.character(x)))
    # rs1=rango(rs1,-0.5,0.5) #check if needed

    rs1=as.matrix(rs1)
    rs2=as.matrix(rs2)
    
    rs1[rs1>0.4]=0.4;rs1[rs1 < -0.4]=-0.4;

  order="original"; range='orig';corre='no_renorm'; type='full'; method='color'; #ga='All';gf='Female';gm='Male' #color square
  cl.offset=1.0;cl.length=5;cl.cex = 1.4;pch.cex=2.0;pch=24;cl.pos = 'n';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b', cl.length=11
  ho=Group;hip1='Steroids_y vs. PFAS_as_x'
  width=5500; height=1800
  hepio=colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150)
#I have driven these separately for html: 
setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")
# jpeg(paste("Linear Model Estimate Plot of (small)",hip1,Group,".jpg"), width = width, height = height, quality = 100,pointsize = 16, res=300);

# corrplot(rs1, type = type, order = order,method=method, p.mat=rs2, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,pch.col='black',pch=pch,
# sig.level = c(.001, .01, .05),cl.pos =  cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length, tl.cex=1.2, #tl.pos='n',
# tl.srt = 90, diag = TRUE,col = hepio,is.corr = FALSE,col.lim=c(-0.4,0.4)); #colorRampPalette(c('blue','white', 'orange'), alpha = TRUE)(100)

# https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html#change-color-spectra-color-legend-and-text-legend
# dev.off();
    # eoh=paste("Linear Model Estimate Plot of (small)",hip1,Group,".jpg")
    # daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'black.pdf'))
    # my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))

svgfile <- paste0(
  "Linear Model Estimate Plot of (small) ",
  hip1, " ", Group, ".svg"
)

# svg(
#   filename = svgfile,
#   width  = width / 72,
#   height = height / 72,
#   pointsize = 16
# )

# svg(
#   filename = svgfile,
#   width  = 12,    # inches
#   height = 7,
#   pointsize = 14
# )

svglite::svglite(
  file = svgfile,
  width  = 12,
  height = 7,
  pointsize = 14
)




corrplot(
  rs1,
  type = type,
  order = order,
  method = method,
  p.mat = rs2,
  tl.col = "black",
  cl.cex = cl.cex,
  pch.cex = pch.cex,
  pch.col = "black",
  pch = pch,
  sig.level = c(.001, .01, .05),
  insig = "label_sig",
  cl.offset = cl.offset,
  cl.length = cl.length,
  tl.cex = 0.9,
  tl.srt = 90,
  diag = TRUE,
  col = hepio,
  is.corr = FALSE,
  col.lim = c(-0.5, 0.5)
)

dev.off()



  } 
  
  return(list(lin_mod_summaries))
}

# The scaling here just in case:
rango = function(x,mi,ma) {(ma-mi)/(max(x)-min(x))*(x-min(x))+mi}

#To apply to all steroidGroups at one go:
RunAnalysis=function(CombinedData,adj,sig.level,sick,sick_group,joo) {
  RunAnalysis=c();huusa=c();heijaa=c('All','female','male'); ok=c('big','small')  ; jj=c()
  hyp=1;hrt=1;#oo="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/lme/"
  for (hyp in 1:2) {
    for (hrt in 1:3) {
      # CalculateEstimates=function(CombinedData,Group,ok,aa,bb,fn,adj)
      RunAnalysis=append(RunAnalysis,CalculateEstimates(CombinedData,heijaa[hrt],ok[hyp],fn,adj,sig.level,sick,sick_group,joo))}}

  return(RunAnalysis)}

# Driving the function with the parameters as follows:
adj='nook'; sig.level=c(.001,0.01, 0.05); sick='no'; joo='joo' #sickGroup..
metanorm_S_non_fdr=RunAnalysis(AllData,adj,sig.level,sick,sick_group,joo) #This prints to your current folder 'Steroid_Data_Analysis

# I was able to revise the above codes together with Copilot a bit, but perhaps not enough for a general readability.
Heatmap of LMEs (linear model estimates) from Steroids vs. BAs & Lipids & Covariates with All Subjects

Heatmap of LMEs (linear model estimates) from Steroids vs. BAs & Lipids & Covariates with All Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Male Subjects

Heatmap of LMEs from Steroids vs. BAs & Lipids & Covariates with Male Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with All Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with All Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Female Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Male Subjects

Heatmap of LMEs from Steroids vs. PFAS & Covariates with Male Subjects

Making Scatter Plots with Models

# This looks similar as the linear model code, but is not exactly so. 
# With this you can draw a scatter plot for each individual combination of two variables within dataset
# This should be as good as it gets... 
CreateScatterPlots=function(AllData, Group, lin_mod_summaries, aa, bb, fn) {
    # main_dir <- paste0(c("C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//", fn, Group, "/"), collapse = "")
    # dir.create(file.path(main_dir)); setwd(file.path(main_dir))
  
  
  # Filter data based on the specified group
  if (Group == 'male') {
    NAFLDo = AllData[AllData[,'Gender'] == max(AllData[,'Gender']),]
  } else if (Group == 'female') {
    NAFLDo = AllData[AllData[,'Gender'] == min(AllData[,'Gender']),]
  } else if (Group == 'All') {
    NAFLDo = AllData
  }
  
  # Select relevant columns from the filtered data
  SG0 = NAFLDo[, c(2:dim(AllData)[2])]
  
  # Fix column names by replacing spaces and special characters
  columnNames = colnames(SG0)
  SG0 = data.frame(SG0)
  colnames(SG0[, 8:27]) <- gsub("-", ".", colnames(SG0[, 8:27]))
  colnames(SG0[, 8:27]) <- gsub("/", ".", colnames(SG0[, 8:27]))
  
  lin_mod_summary = c(); MedAnPlot=c()
  xnam = colnames(SG0)[c(4:7)]
  TreatmentVariables = colnames(AllData)[52:58]
  y = TreatmentVariables
  TreatmentN = TreatmentVariables
  
  # Loop through each combination of xnam and y to calculate correlations
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      # Extract relevant rows from lin_mod_summaries based on conditions
      lin_mod_essentials = lin_mod_summaries[lin_mod_summaries[, 1] == y[j] & lin_mod_summaries[, 2] == xnam[i] & lin_mod_summaries[, 3] == Group,]
      
      # Prepare data for plotting
      jeps = SG0
      r = as.numeric(lin_mod_essentials[4][1,])
      p.valueList = as.numeric(lin_mod_essentials[5][1,])
      rsadj = as.numeric(lin_mod_essentials[6][1,])
      colnames(jeps) = colnames(AllData)[2:dim(AllData)[2]]
      TreatmentVariables = as.character(lin_mod_essentials[2][1,])
      MediatorVariables = as.character(lin_mod_essentials[1][1,])
      
      # Clean up MediatorVariables names
      MediatorVariables <- gsub("\\.", "-", MediatorVariables)
      MediatorVariables <- gsub("X", "", MediatorVariables)
      if (MediatorVariables == "T-Epi-T") {
        MediatorVariables[MediatorVariables == "T-Epi-T"] = "T/Epi-T"
      }
      Treatment2 = TreatmentVariables
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      
      # Set up directory for saving plots
      # setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")
      main_dir <- paste0(c("C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//", fn, Group, "/"), collapse = "")
      dir.create(file.path(main_dir)); setwd(file.path(main_dir))
      
      sub_dir <- TreatmentVariables
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      
      # Save plots as JPEG files
      if (MediatorVariables != "T/Epi-T") {
        # jpeg(paste("Correlations plotted", Group, Treatment2, MediatorVariables, ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted", Group, Treatment2, MediatorVariables, ".svg"), width = 6, height = 6)
      } else {
        # jpeg(paste("Correlations plotted_alle", Treatment2, 'T.Epi.T', ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted_alle", Treatment2, 'T.Epi.T', ".svg"), width = 6, height = 6)

      }
      
      # Plot correlations using ggplot2
      xcx = jeps[, TreatmentVariables]
      ycy = jeps[, MediatorVariables]
      MedAnPlot = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(TreatmentVariables) +
        ylab(MediatorVariables) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 5))) +
        annotate("text", x = min(xcx), y = max(ycy) - MedAnPlot, hjust = 0, vjust = 0, label = paste0("p = ", round(p.valueList, 5))) +
        theme_classic()
      
      print(a)
      dev.off()
    }
  }
  
  
  
  # Repeat the process for different combinations of xnam and y
  xnam <- colnames(SG0)[c(2)]
  y <- TreatmentN
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      lin_mod_essentials = lin_mod_summaries[lin_mod_summaries[, 1] == y[j] & lin_mod_summaries[, 2] == xnam[i] & lin_mod_summaries[, 3] == Group,]
      jeps = SG0
      r = as.numeric(lin_mod_essentials[4][1,])
      p.valueList = as.numeric(lin_mod_essentials[5][1,])
      rsadj = as.numeric(lin_mod_essentials[6][1,])
      colnames(jeps) = colnames(AllData)[2:dim(AllData)[2]]
      TreatmentVariables = as.character(lin_mod_essentials[2][1,])
      MediatorVariables = as.character(lin_mod_essentials[1][1,])
      MediatorVariables <- gsub("\\.", "-", MediatorVariables)
      MediatorVariables <- gsub("X", "", MediatorVariables)
      if (MediatorVariables == "T-Epi-T") {
        MediatorVariables[MediatorVariables == "T-Epi-T"] = "T/Epi-T"
      }
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      # C://Users//patati//Desktop//Turku//R// C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//
      main_dir <- paste0(c("C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//", fn, Group, "/"), collapse = "")
      dir.create(file.path(main_dir)); setwd(file.path(main_dir))
      sub_dir <- TreatmentVariables
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      if (MediatorVariables != "T/Epi-T") {
        # jpeg(paste("Correlations plotted", Group, Treatment2, MediatorVariables, ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted", Group, Treatment2, MediatorVariables, ".svg"), width = 6, height = 6)
      } else {
        # jpeg(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".svg"), width = 6, height = 6)
      }
      xcx = jeps[, TreatmentVariables]
      ycy = jeps[, MediatorVariables]
      MedAnPlot = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(TreatmentVariables) +
        ylab(MediatorVariables) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 4))) +
        annotate("text", x = min(xcx), y = max(ycy) - MedAnPlot, hjust = 0, vjust = 0, label = paste0("p = ", round(p.valueList, 5))) +
        theme_classic()
      print(a)
      dev.off()
      lin_mod_summary = rbind(lin_mod_summary, c(y[j], xnam[i], Group, r, p.valueList, rsadj))
    }
  }
  
  # Repeat the process for different combinations of xnam and y
  xnam <- colnames(SG0)[c(3)]
  y <- TreatmentN
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      lin_mod_essentials = lin_mod_summaries[lin_mod_summaries[, 1] == y[j] & lin_mod_summaries[, 2] == xnam[i] & lin_mod_summaries[, 3] == Group,]
      jeps = SG0
      r = as.numeric(lin_mod_essentials[4][1,])
      p.valueList = as.numeric(lin_mod_essentials[5][1,])
      rsadj = as.numeric(lin_mod_essentials[6][1,])
      colnames(jeps) = colnames(AllData)[2:dim(AllData)[2]]
      TreatmentVariables = as.character(lin_mod_essentials[2][1,])
      MediatorVariables = as.character(lin_mod_essentials[1][1,])
      MediatorVariables <- gsub("\\.", "-", MediatorVariables)
      MediatorVariables <- gsub("X", "", MediatorVariables)
      if (MediatorVariables == "T-Epi-T") {
        MediatorVariables[MediatorVariables == "T-Epi-T"] = "T/Epi-T"
      }
      Treatment2 = TreatmentVariables
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//", fn, Group, "/"), collapse = "")
      dir.create(file.path(main_dir)); setwd(file.path(main_dir))
      
      sub_dir <- TreatmentVariables
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      if (MediatorVariables != "T/Epi-T") {
        # jpeg(paste("Correlations plotted_alle", Treatment2, MediatorVariables, ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted_alle", Treatment2, MediatorVariables, ".svg"), width = 6, height = 6)
      } else {
        # jpeg(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".svg"), width = 6, height = 6)

      }
      
      xcx = jeps[, TreatmentVariables]
      ycy = jeps[, MediatorVariables]
      MedAnPlot = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(TreatmentVariables) +
        ylab(MediatorVariables) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 5))) +
        annotate("text", x = min(xcx), y = max(ycy) - MedAnPlot, hjust = 0, vjust = 0, label = paste0("p = ", round(p.valueList, 5))) +
        theme_classic()
      print(a)
      dev.off()
    }
  }
  
  # Repeat the process for different combinations of xnam and y
  xnam <- TreatmentN
  y = colnames(SG0[, 8:27])
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      lin_mod_essentials = lin_mod_summaries[lin_mod_summaries[, 1] == y[j] & lin_mod_summaries[, 2] == xnam[i] & lin_mod_summaries[, 3] == Group,]
      jeps = SG0
      r = as.numeric(lin_mod_essentials[4][1,])
      p.valueList = as.numeric(lin_mod_essentials[5][1,])
      rsadj = as.numeric(lin_mod_essentials[6][1,])
      colnames(jeps) = colnames(AllData)[2:dim(AllData)[2]]
      TreatmentVariables = as.character(lin_mod_essentials[2][1,])
      MediatorVariables = as.character(lin_mod_essentials[1][1,])
      MediatorVariables <- gsub("\\.", "-", MediatorVariables)
      MediatorVariables <- gsub("X", "", MediatorVariables)
      if (MediatorVariables == "T-Epi-T") {
        MediatorVariables[MediatorVariables == "T-Epi-T"] = "T/Epi-T"
      }
      Treatment2 = TreatmentVariables
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//", fn, Group, "/"), collapse = "")
      dir.create(file.path(main_dir)); setwd(file.path(main_dir))
      sub_dir <- TreatmentVariables
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      if (MediatorVariables != "T/Epi-T") {
        # jpeg(paste("Correlations plotted", Group, Treatment2, MediatorVariables, ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted", Group, Treatment2, MediatorVariables, ".svg"), width = 6, height = 6)
      } else {
        # jpeg(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".svg"), width = 6, height = 6)

      }
      xcx = jeps[, TreatmentVariables]
      ycy = jeps[, MediatorVariables]
      MedAnPlot = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(TreatmentVariables) +
        ylab(MediatorVariables) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 4))) +
        annotate("text", x = min(xcx), y = max(ycy) - MedAnPlot, hjust = 0, vjust = 0, label = paste0("p = ", round(p.valueList, 5))) +
        theme_classic()
      print(a)
      dev.off()
    }
  }
  
  # Additional analysis for specific combinations of xnam and y
  xnam <- c(x3, x6)
  y <- c(colnames(SG0[, 8:27]))
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      lin_mod_essentials = lin_mod_summaries[lin_mod_summaries[, 1] == y[j] & lin_mod_summaries[, 2] == xnam[i] & lin_mod_summaries[, 3] == Group,]
      jeps = SG0
      r = as.numeric(lin_mod_essentials[4][1,])
      p.valueList = as.numeric(lin_mod_essentials[5][1,])
      rsadj = as.numeric(lin_mod_essentials[6][1,])
      colnames(jeps) = colnames(AllData)[2:dim(AllData)[2]]
      TreatmentVariables = as.character(lin_mod_essentials[2][1,])
      MediatorVariables = as.character(lin_mod_essentials[1][1,])
      MediatorVariables <- gsub("\\.", "-", MediatorVariables)
      MediatorVariables <- gsub("X", "", MediatorVariables)
      if (MediatorVariables == "T-Epi-T") {
        MediatorVariables[MediatorVariables == "T-Epi-T"] = "T/Epi-T"
      }
      Treatment2 = TreatmentVariables
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//", fn, Group, "/"), collapse = "")
      dir.create(file.path(main_dir)); setwd(file.path(main_dir))
      sub_dir <- TreatmentVariables
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      if (MediatorVariables != "T/Epi-T") {
        # jpeg(paste("Correlations plotted", Group, Treatment2, MediatorVariables, ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted", Group, Treatment2, MediatorVariables, ".svg"), width = 6, height = 6)

      } else {
        # jpeg(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".svg"), width = 6, height = 6)
      }
      xcx = jeps[, TreatmentVariables]
      ycy = jeps[, MediatorVariables]
      MedAnPlot = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(TreatmentVariables) +
        ylab(MediatorVariables) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 5))) +
        annotate("text", x = min(xcx), y = max(ycy) - MedAnPlot, hjust = 0, vjust = 0, label = paste0("p = ", round(p.valueList, 5))) +
        theme_classic()
      print(a)
      dev.off()
      lin_mod_summary = rbind(lin_mod_summary, c(y[j], xnam[i], Group, r, p.valueList, rsadj))
    }
  }
  
  # Final analysis for specific combinations of xnam and y
  xnam <- c('AGE', 'BMI', colnames(SG0)[c(4:7)])
  y <- c(colnames(SG0[, 8:27]))
  for (i in 1:length(xnam)) {
    for (j in 1:length(y)) {
      lin_mod_essentials = lin_mod_summaries[lin_mod_summaries[, 1] == y[j] & lin_mod_summaries[, 2] == xnam[i] & lin_mod_summaries[, 3] == Group,]
      jeps = SG0
      r = as.numeric(lin_mod_essentials[4][1,])
      p.valueList = as.numeric(lin_mod_essentials[5][1,])
      rsadj = as.numeric(lin_mod_essentials[6][1,])
      colnames(jeps) = colnames(AllData)[2:dim(AllData)[2]]
      TreatmentVariables = as.character(lin_mod_essentials[2][1,])
      MediatorVariables = as.character(lin_mod_essentials[1][1,])
      MediatorVariables <- gsub("\\.", "-", MediatorVariables)
      MediatorVariables <- gsub("X", "", MediatorVariables)
      if (MediatorVariables == "T-Epi-T") {
        MediatorVariables[MediatorVariables == "T-Epi-T"] = "T/Epi-T"
      }
      Treatment2 = TreatmentVariables
      colnames(jeps)[colnames(jeps) == 'HOMA-IR'] = 'HOMA.IR'
      colnames(jeps) <- gsub(" ", "\\.", colnames(jeps))
      main_dir <- paste0(c("C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//", fn, Group, "/"), collapse = "")
      dir.create(file.path(main_dir)); setwd(file.path(main_dir))
      sub_dir <- TreatmentVariables
      if (file.exists(file.path(main_dir, sub_dir))) {
        setwd(file.path(main_dir, sub_dir))
      } else {
        dir.create(file.path(main_dir, sub_dir))
        setwd(file.path(main_dir, sub_dir))
      }
      if (MediatorVariables != "T/Epi-T") {
        # jpeg(paste("Correlations plotted", Group, Treatment2, MediatorVariables, ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted", Group, Treatment2, MediatorVariables, ".svg"), width = 6, height = 6)

      } else {
        # jpeg(paste("Correlations plotted_", Treatment2, 'T.Epi.T', ".svg"), width = 1000, height = 1000, quality = 100, pointsize = 20, res = 300)
        svglite::svglite(paste("Correlations plotted", Treatment2, 'T.Epi.T', ".svg"), width = 6, height = 6)
      }
      xcx = jeps[, TreatmentVariables]
      ycy = jeps[, MediatorVariables]
      MedAnPlot = round(sd(ycy) / 2, 2)
      a = ggplot(jeps, aes(y = ycy, x = xcx)) +
        geom_point() +
        xlab(TreatmentVariables) +
        ylab(MediatorVariables) +
        geom_smooth(method = "lm", col = "black") +
        annotate("text", x = min(xcx), y = max(ycy), hjust = 0, vjust = 0, label = paste0("r = ", round(r, 5))) +
        annotate("text", x = min(xcx), y = max(ycy) - MedAnPlot, hjust = 0, vjust = 0, label = paste0("p = ", round(p.valueList, 5))) +
        theme_classic()
      print(a)
      dev.off()
      lin_mod_summary = rbind(lin_mod_summary, c(y[j], xnam[i], Group, r, p.valueList, rsadj))}}
  
  # Convert results to a data frame and return
  hesa = lin_mod_summary
  hoi = as.data.frame(lin_mod_summary)
  # main_dir <- paste0(c("C://Users//patati//Documents//GitHub//Steroid_Data_Analysis//", fn), collapse = "")
  # dir.create(file.path(main_dir)); setwd(file.path(main_dir))
  setwd(main_dir)
  
  return(list(lin_mod_summaries))}



RunScatterPlotAnalysis=function(AllData,lin_mod_summaries) {
  RunAnalysis=c();heijaa=c('All','female','male'); 
  fn='metabnorm//covScaled//non_fdr//'## this was excessive: fn='For' #:)...
  for (hrt in 1:3) {
    RunAnalysis=append(RunAnalysis,CreateScatterPlots(AllData,heijaa[hrt],lin_mod_summaries,aa,bb,fn))}
  return(RunAnalysis)}


# Now I'll be just showing some examples, but this works and if you drive this, you can find all the combos in the folders.
# E.g.: ".../R/metabnorm/covScaled/non_fdr/'Subject'/Hexcer/Correlations plotted All Hexcer S .jpg" 
# fyi: do the 'subject' folders (all, male, fem.) separately
aa=-0.5; bb=0.5; adj='nook'; sig.level=c(.001,0.01, 0.05)
fn='metabnorm//covScaled//non_fdr//'#
metanorm_S_non_fdr=RunAnalysis(CovariatesScaledData,adj,sig.level) # This was already done above
metanorm_S_fdr_non_tot=ldply(metanorm_S_non_fdr, data.frame) #Making the above ok for the next:
non_fdr_check=RunScatterPlotAnalysis(CovariatesScaledData,metanorm_S_fdr_non_tot) # This plots all the combinations, so it takes some time

# The above code in this segment does not get easily better with Copilot, but I let it comment it a bit.
# Moreover, it would not be better (quickly) with ChatGPT.
# It would be nice to improve this, so I'll leave it as a 'under-development-but-working' version. 
# (Not to myself: Function.R can be found in the main folder of this analysis, 
# there's more there, e.g., one mini-function is working.


#Making some 3d plots:

# Load the dataset
data <- CovariatesScaledData

# Plot a 3D scatter plot for PFOA, T/Epi-T, and PE
plot3d(
  y = data[,'PFOA'],
  x = data[,'T/Epi-T'],
  z = data[,'PE'],
  col = 'black',
  type = 's',
  radius = .2,
  ylab = "PFHxS",
  xlab = "T/Epi-T",
  zlab = "PE")

# Fit a linear model for PFHxS based on T/Epi-T and PE
my.lm <- lm(data[,'PFHxS'] ~ data[,'T/Epi-T'] + data[,'PE'])

# Extract the variables for the scatter plot
x <- data[,'T/Epi-T']
y <- data[,'PFHxS']
z <- data[,'PE']

# Create a 3D scatter plot with the fitted plane
s3d <- scatterplot3d(
  x, y, z, pch = 20, mar = c(5, 3, 4, 3),
  main = "3D Scatter Plot of Compounds and Their Residuals",
  angle = 55, scale.y = 0.5,
  xlab = "T/Epi-T",
  ylab = "PFHxS",
  zlab = "PE")

s3d$plane3d(my.lm, lty = "dotted")

# Convert coordinates for the original points and the fitted plane
orig <- s3d$xyz.convert(x, y, z)
plane <- s3d$xyz.convert(x, y, fitted(my.lm))

# Determine the color and line type for the residuals
i.negpos <- 1 + (resid(my.lm) > 0)
segments(orig$x, orig$y, plane$x, plane$y,
         col = c("blue", "red")[i.negpos], lty = (2:1)[i.negpos])

# Load the dataset again
data <- CovariatesScaledData

# Plot a 3D scatter plot for PFOA, AN, and PC_O
plot3d(
  y = data[,'PFOA'],
  x = data[,'AN'],
  z = data[,'PC_O'],
  col = 'black',
  type = 's',
  radius = .2,
  ylab = "PFOA",
  xlab = "AN",
  zlab = "PC_O")

# Fit a linear model for PFOA based on AN and PC_O
my.lm <- lm(data[,'PFOA'] ~ data[,'AN'] + data[,'PC_O'])

# Extract the variables for the scatter plot
y <- data[,'PFOA']
x <- data[,'AN']
z <- data[,'PC_O']

# Create a 3D scatter plot with the fitted plane
s3d <- scatterplot3d(
  x, y, z, pch = 20, mar = c(5, 3, 4, 3),
  main = "3D Scatter Plot of Compounds and Their Residuals",
  angle = 55, scale.y = 0.5,
  xlab = "AN",
  ylab = "PFOA",
  zlab = "PC_O")

s3d$plane3d(my.lm, lty = "dotted")

# Convert coordinates for the original points and the fitted plane
orig <- s3d$xyz.convert(x, y, z)
plane <- s3d$xyz.convert(x, y, fitted(my.lm))

# Determine the color and line type for the residuals
i.negpos <- 1 + (resid(my.lm) > 0)
segments(orig$x, orig$y, plane$x, plane$y,
         col = c("blue", "red")[i.negpos], lty = (2:1)[i.negpos])

# Load the dataset again
data <- CovariatesScaledData

# Plot a 3D scatter plot for PFHxA, DHEA, and PC
plot3d(
  y = data[,'PFHxA'],
  x = data[,'DHEA'],
  z = data[,'PC_O'],
  col = 'black',
  type = 's',
  radius = .2,
  ylab = "PFHxA",
  xlab = "DHEA",
  zlab = "PC")

# Fit a linear model for PFHxA based on DHEA and PC
my.lm <- lm(data[,'PFHxA'] ~ data[,'DHEA'] + data[,'PC'])

# Extract the variables for the scatter plot
y <- data[,'PFHxA']
x <- data[,'DHEA']
z <- data[,'PC']

# Create a 3D scatter plot with the fitted plane
s3d <- scatterplot3d(
  x, y, z, pch = 20, mar = c(5, 3, 4, 3),
  main = "3D Scatter Plot of Compounds and Their Residuals",
  angle = 55, scale.y = 0.5,
  xlab = "DHEA",
  ylab = "PFHxA",
  zlab = "PC")

s3d$plane3d(my.lm, lty = "dotted")

# Convert coordinates for the original points and the fitted plane
orig <- s3d$xyz.convert(x, y, z)
plane <- s3d$xyz.convert(x, y, fitted(my.lm))

# Determine the color and line type for the residuals
i.negpos <- 1 + (resid(my.lm) > 0)
segments(orig$x, orig$y, plane$x, plane$y,
         col = c("blue", "red")[i.negpos], lty = (2:1)[i.negpos])

#This is for female subjects. Checking estrogen levels at menopausal age.
CombinedData_f=CombinedData[CombinedData[,'SEX.1F.2M']==min(CombinedData[,'SEX.1F.2M']),]
# CombinedData_f=CombinedData
AllData_f=AllData[AllData[,'Gender']==min(AllData[,'Gender']),]
# AllData_f=AllData

# CombinedData_fa=cbind(CombinedData_f[,'AGE'],AllData_f[,'E2']/AllData_f[,'E1'])
# CombinedData_fa=cbind(CombinedData_f[,'AGE'],AllData_f[,'E2'])
CombinedData_fa=cbind(CombinedData_f[,'AGE'],CombinedData_f[,'E2']/CombinedData_f[,'E1'])
colnames(CombinedData_fa) = c('AGE', 'E2_per_E1')

# Remove outliers using z-score
# Assuming CombinedData_fa is your original data frame
z_scores <- data.frame(scale(CombinedData_fa))

# Rename your data frame to avoid conflict
df <- CombinedData_fa; df_clean <- df
# Remove outliers using z-score
# df_clean <- df[apply(abs(z_scores), 1, function(row) all(row < 3)), ]

# Fit linear model
model <- lm(E2_per_E1 ~ AGE, data = data.frame(df_clean))
summary_model <- summary(model)

# Extract coefficients and R-squared
slope <- coef(model)[2]
intercept <- coef(model)[1]
r_squared <- summary_model$r.squared

# Create plot
var='E2/E1'
ggplot(df_clean, aes(x = AGE, y = E2_per_E1)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  labs(title = "Scatter Plot with Linear Regression (for Female Subjects)",
    subtitle = paste0("y = ", round(slope, 2), "x + ", round(intercept, 2),
                      ", R² = ", round(r_squared, 2)),
    x = "AGE (years)",
    y = paste0(var, " (pM)")) + 
  theme_minimal()
Correlations Scatter Plotted_All_Hexcer S

Correlations Scatter Plotted_All_Hexcer S

Correlations Scatter Plotted_Female_Hexcer S

Correlations Scatter Plotted_Female_Hexcer S

Correlations Scatter Plotted_Male_Hexcer S

Correlations Scatter Plotted_Male_Hexcer S

Correlations Scatter Plotted_All_C4_L P5

Correlations Scatter Plotted_All_C4_L P5

Correlations Scatter Plotted_Female_C4_L P5

Correlations Scatter Plotted_Female_C4_L P5

Correlations Scatter Plotted_Male_C4_L P5

Correlations Scatter Plotted_Male_C4_L P5

Making Network Plot

# Following the previous work for doing the network plot (https://r-graph-gallery.com/network.html)

# Here the main data of the plot takes correlations:
# Remove specific columns from CurrentData
CurrentData=CurrentData[,!colnames(CurrentData) %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]; 
# Remove columns specified in x4 from CurrentData
CurrentData=CurrentData[,!colnames(CurrentData) %in% x4] 
# Rename column '17aOH-P4' to '17a-OHP4'
colnames(CurrentData)[colnames(CurrentData)=="17aOH-P4"]="17a-OHP4" 
dat = CurrentData; 
# Remove 'Gender' and 'PatientNumber' columns from dat
dat=dat[,!colnames(dat) %in% c('Gender','PatientNumber')] 
# Calculate Spearman correlation matrix
resulta <- (rcorr(as.matrix(dat), type = c('spearman')))$r 
# Set threshold level for connections between vertices (pampulat)
n_level=0.9 
# Calculate Nrr and replace NA values with 1
Nrr=qpNrr(resulta, verbose=FALSE);Nrr[is.na(Nrr)]=1;
# Create a condition matrix based on n_level threshold
cond=data.frame(as.matrix(Nrr<n_level)) 
# Elementwise matrix multiplication and update column names to match row names # https://www.geeksforgeeks.org/elementwise-matrix-multiplication-in-r/
RN=data.frame(resulta);GraphMat=cond*RN;GraphMat=as.matrix(GraphMat);resulta=GraphMat;colnames(resulta)=rownames(resulta) 
GraphMat=resulta
# Calculate lengths of x1 to x6 and assign  variables a to f
a=length(x1)-2;b=length(x2);c=length(x3);d=length(x4);e=length(x5);f=length(x6); 

# Removing self-correlation
GraphMat[1:a,1:a]=0
GraphMat[(a+1):(a+b),(a+1):(a+b)]=0
GraphMat[(a+b+1):(a+b+c),(a+b+1):(a+b+c)]=0
GraphMat[(a+b+c+1):(a+b+c+e),(a+b+c+1):(a+b+c+e)]=0
GraphMat[(a+b+c+e+1):(a+b+c+e+f),(a+b+c+e+1):(a+b+c+e+f)]=0

# Select a subset of GraphMat based on column names
GraphMat = GraphMat[colnames(GraphMat)[7:66], colnames(GraphMat)[7:66]]

# Create a graph from adjacency matrix
g <- graph_from_adjacency_matrix(GraphMat, mode="upper", weighted=TRUE, diag=FALSE)

# Convert graph to edge list and create a data frame with weights
e <- as_edgelist(g)
df <- as.data.frame(cbind(e, E(g)$weight))

# Convert the third column to numeric
df[, 3] = as.numeric(df[, 3])
hoi=df 
hoi=hoi[!duplicated(hoi[,c(1,2)]),] # Remove duplicate rows based on the first two columns

# Load igraph library if needed
# https://r-graph-gallery.com/249-igraph-network-map-a-color.html
# Create a data frame for links with source, target, and importance
links <- data.frame(
  source=c("A","A", "A", "A", "A","J", "B", "B", "C", "C", "D","I"),
  target=c("B","B", "C", "D", "J","A","E", "F", "G", "H", "I","I"),
  importance=(sample(1:4, 12, replace=T))) 
# Set column names of hoi to match links
colnames(hoi)=colnames(links) 
links=hoi
# Get distinct sources and rename column to 'label'
sources=hoi %>% distinct(source) %>% rename(source='label')
# Get distinct targets and rename column to 'label'
destinations=hoi %>% distinct(target) %>% rename(target ='label') 
# Merge sources and destinations into nodess
nodess <- full_join(sources, destinations, by = "label") 

# The names of the variable steroidGroups, such as xc for contaminants
xc=x5[x5 %in% nodess$label]
xb=x3[x3 %in% nodess$label]
xl=x6[x6 %in% nodess$label]
x2[x2 =='17aOH-P4']='17a-OHP4' #Next time check these wrong names early on... :)
xs=x2[x2 %in% nodess$label]
# nodess$label=c(xc,xb,xl,xs) #ok, do not use this

# nodess <- nodess[nodess$label %in% c(xc, xb, xl, xs), ]
nodess <- data.frame(label = nodess, stringsAsFactors = FALSE)

# Create a data frame 'nodes' with two columns: 'name' and 'carac'
# 'name' is taken from the first column of 'nodess'
# 'carac' is a categorical variable indicating the type of each node
# nodes <- data.frame(name=nodess[,1], 
#                     carac=( c(rep("Contaminants",length(xc)),rep("Bile Acids",length(xb)),rep("Lipids",length(xl)),rep("Steroids",length(xs))))) #range on kaikki +1

nodes <- data.frame(
  name = nodess$label,
  carac = dplyr::case_when(
    nodess$label %in% xc ~ "Contaminants",
    nodess$label %in% xb ~ "Bile Acids",
    nodess$label %in% xl ~ "Lipids",
    nodess$label %in% xs ~ "Steroids",
    TRUE ~ "Unknown"
  )
)

# Convert the data frame 'links' and 'nodes' into an igraph object
network <- graph_from_data_frame(d=links, vertices=nodes, directed=F) 

# Load necessary libraries for color palettes and graphics (library(RColorBrewer), library(ragg)) if needed

# Set the font to 'Calibri (Body)'
windowsFonts(A = windowsFont("Calibri (Body)"))

# Define a palette of 4 colors
coul  <- c('#B2BEB5','Green','Red','Orange') 

# Create a vector of colors corresponding to the 'carac' variable in the network
my_color <- coul[as.numeric(as.factor(V(network)$carac))]

# Save the plot as a JPEG file with specified dimensions and quality
setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")
jpeg('network.jpg', width=4, height=4.7, units="in", quality=100, pointsize=7, res=1000)

# Plot the network with specified parameters
plot(network, mode = "circle", vertex.color=my_color, vertex.size = 10,
     edge.arrow.size = 0.8, vertex.label.cex = 0.35, edge.width=as.numeric(E(network)$importance)*6.00 )

# Add a legend to the plot
legend("topright", legend=levels(as.factor(V(network)$carac)),
       col = coul, bty = "n", pch=20, pt.cex = 1.3, cex = 1.3, text.col=coul,
       horiz = FALSE, inset = c(0.65, 0.8))

# Close the JPEG device
dev.off()

# Read the saved JPEG image
eoh='network.jpg'
# my_image <- image_read(eoh)
# 
# # Convert the image to SVG format and save it
# my_svg <- image_convert(my_image, format="svg")
# image_write(my_svg, paste(eoh,".svg"))
# 
# # Display the figure and convert it to PDF
# knitr::include_graphics(eoh)
# daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))

setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")

## ---------- REAL VECTOR SVG ----------
svglite::svglite(
  "network.svg",
  width  = 4,
  height = 4.7
)

plot(
  network,
  mode = "circle",
  vertex.color = my_color,
  vertex.size = 10,
  edge.arrow.size = 0.8,
  vertex.label.cex = 0.35,
  edge.width = as.numeric(E(network)$importance) * 6.00
)

legend(
  "topright",
  legend = levels(as.factor(V(network)$carac)),
  col = coul,
  bty = "n",
  pch = 20,
  pt.cex = 1.3,
  cex = 1.3,
  text.col = coul,
  horiz = FALSE,
  inset = c(0.65, 0.8)
)

dev.off()


## ---------- REAL VECTOR PDF ----------
pdf(
  "network.pdf",
  width = 4,
  height = 4.7,
  useDingbats = FALSE
)

plot(
  network,
  mode = "circle",
  vertex.color = my_color,
  vertex.size = 10,
  edge.arrow.size = 0.8,
  vertex.label.cex = 0.35,
  edge.width = as.numeric(E(network)$importance) * 6.00
)

legend(
  "topright",
  legend = levels(as.factor(V(network)$carac)),
  col = coul,
  bty = "n",
  pch = 20,
  pt.cex = 1.3,
  cex = 1.3,
  text.col = coul,
  horiz = FALSE,
  inset = c(0.65, 0.8)
)

dev.off()

# Copilot helped here with the commenting.

Making Causal Mediation Analysis

# The basic hypothesis. All are variables (y~x+m;m~x)
RunMediationAnalysis <- function(TreatmentVariables, MediatorVariables, OutcomeVariables, AllData, Group, name, simss, t.valueList, test, sick, sick_group, date = format(Sys.Date(), "%Y-%m-%d"),fn) {
  
  # Ensure AllData is a data frame
  if (!is.data.frame(AllData)) {
    AllData <- as.data.frame(AllData)
    message("Converted AllData to data frame.")
  }

  # Determine the condition based on the group
  if (Group == 'female') {
    cond <- AllData[, 'Gender'] == min(AllData[, 'Gender'], na.rm = TRUE)
  } else if (Group == 'male') {
    cond <- AllData[, 'Gender'] == max(AllData[, 'Gender'], na.rm = TRUE)
  } else {
    cond <- rep(TRUE, nrow(AllData))
  }

  print(paste("Group:", Group, "- Rows after filtering:", sum(cond)))

  # Filter the dataset based on the condition and sickness status
  if (sick == 'yes') {
    tv_red <- AllData[cond & as.vector(sick_group), ]
  } else {
    tv_red <- AllData[cond, ]
  }

  print(paste("Group:", Group, "- Rows after sick filter:", nrow(tv_red)))

  # Extract variables
  X <- tv_red[, TreatmentVariables, drop = FALSE]
  M <- tv_red[, MediatorVariables, drop = FALSE]
  Y <- tv_red[, OutcomeVariables, drop = FALSE]

  # Combine and clean data
  Data <- na.omit(data.frame(cbind(X, M, Y)))

  # Get variable names
  x_names <- make.names(colnames(X))
  m_names <- make.names(colnames(M))
  y_names <- make.names(colnames(Y))

  # Define control and treatment values
  control.value <- colMins(as.matrix(Data[, x_names]))
  treat.value <- colMaxs(as.matrix(Data[, x_names]))

  # Initialize results
  res <- data.frame()
  rn <- c()

  for (y in y_names) {
    for (m in m_names) {
      for (x in x_names) {

        # Build formulas
        fmla1 <- as.formula(paste0("`", m, "` ~ `", x, "`"))
        fmla2 <- as.formula(paste0("`", y, "` ~ ", paste0("`", c(x, m), "`", collapse = " + ")))

        model_m <- lm(fmla1, data = Data)
        model_y <- lm(fmla2, data = Data)

        # Determine treatment and control values
        if (t.valueList == "no") {
          treat_val <- 1
          control_val <- 0
        } else if (t.valueList == "yes") {
          if (!is.na(Data[test, x])) {
            treat_val <- Data[test, x]
            control_val <- control.value[x]
          } else {
            print(paste("Skipping due to NA test value for:", x))
            next
          }
        } else if (t.valueList == "minmax") {
          treat_val <- treat.value[x]
          control_val <- control.value[x]
        } else {
          treat_val <- 1
          control_val <- 0
        }

        print(paste("Running mediation for:", x, m, y, "- Treat val:", treat_val, "- Control val:", control_val))

        # Run mediation
        med_out <- tryCatch({
          if (t.valueList == "no") {
            mediation::mediate(model_m, model_y, treat = x, mediator = m, sims = simss)
          } else if (!is.null(treat_val) && !is.null(control_val)) {
            mediation::mediate(model_m, model_y, treat = x, mediator = m, sims = simss,
                               control.value = control_val, treat.value = treat_val)
          } else {
            warning(paste("Skipping due to missing treatment/control values for:", x, m, y))
            return(NULL)
          }
        }, error = function(e) {
          warning(paste("Mediation failed for:", x, m, y, ":", e$message))
          return(NULL)
        })

        if (!is.null(med_out)) {
          summary_out <- summary(med_out)
          tmp <- c(summary_out$d0, summary_out$d0.p, summary_out$d0.ci[1], summary_out$d0.ci[2],
                   summary_out$z0, summary_out$z0.p, summary_out$z0.ci[1], summary_out$z0.ci[2],
                   summary_out$n1, summary_out$n1.p, summary_out$n1.ci[1], summary_out$n1.ci[2],
                   summary_out$tau.coef, summary_out$tau.p, summary_out$tau.ci[1], summary_out$tau.ci[2])
          res <- rbind(res, tmp)
          rn <- c(rn, paste(x, m, y, sep = "_"))
        } else {
          print(paste("Mediation returned NULL for:", x, m, y))
        }
      }
    }
  }

  # Finalize results
  # if (dim(res)[1]==0 & dim(res)[2]==0) {res=t(data.frame(rep(0,16)))}
  
# if (nrow(res) == 0) {
#   cat("No results found — saving empty file for group:", Group, "\n")
#   res <- data.frame(Note = paste("No mediation results for group:", Group))
# }
# Define column names
col_names <- c('ACME', 'd0.p', 'd0.ci_l', 'd0.ci_u',
               'ADE', 'z0.p', 'z0.ci_l', 'z0.ci_u',
               'Proportion Mediated', 'n1.p', 'n1.ci_l', 'n1.ci_u',
               'Total Effect', 'tau.p', 'tau.ci_l', 'tau.ci_u')

# If results exist, finalize and sort
if (nrow(res) > 0) {
  rownames(res) <- rn
  colnames(res) <- col_names
  res <- res[order(res[, 'd0.p']), ]
  rownames(res) <- gsub("X11", "11", rownames(res))
  rownames(res) <- gsub("X17", "17", rownames(res))
} else {
  # Create an empty data frame with the correct column names
  res <- data.frame(matrix(ncol = length(col_names), nrow = 0))
  colnames(res) <- col_names
  rownames(res) <- "No results"
  res[1, 1] <- paste("No mediation results for group (perhaps due similar output values or colinearity):", Group, fn)
  cat("No results found — saving message row for group:", Group, "\n")

}

# Save to Excel
file_path <- paste0("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/", name, "_", fn, "_", Group, "_", date, "_mediation_results.xlsx")
cat("Saving results to:", file_path, "\n")
write.xlsx(res, file = file_path, append = FALSE, row.names = TRUE)

  return(res)
}

RunEssentialAnalysis <- function(TreatmentVariables, MediatorVariables, OutcomeVariables, AllData, Group, name, simss, t.valueList, test, sick, sick_group, date = format(Sys.Date(), "%Y-%m-%d"), fn, lkm, joo) {
  
  # Ensure AllData is a data frame
  if (!is.data.frame(AllData)) {
    AllData <- as.data.frame(AllData)
    message("Converted AllData to data frame.")
  }

  # Set the working directory
  hoi1 <- paste0("C:/Users/patati/Desktop/Turku/R/basic causal mediation with the counterfactuals/", fn)
  if (!dir.exists(hoi1)) {
    dir.create(hoi1, recursive = TRUE)
    message("Created directory: ", hoi1)
  }
  setwd(hoi1)

  # Define the name for the output files
  name <- paste(simss, 'basic_divisions')

  # Run for all
  Group <- 'All'
  message("Starting mediation analysis for group: All")
  RunAma <- tryCatch({
    RunMediationAnalysis(TreatmentVariables, MediatorVariables, OutcomeVariables, AllData, Group, name, simss, t.valueList, test, sick, sick_group, date,fn)
  }, error = function(e) {
    warning(paste("Error in All group:", e$message))
    data.frame()
  })
  save(RunAma, file = paste0(fn, 'all.RData'))

  # Run for females
  Group <- 'female'
  message("Starting mediation analysis for group: female")
  RunAf <- tryCatch({
    RunMediationAnalysis(TreatmentVariables, MediatorVariables, OutcomeVariables, AllData, Group, name, simss, t.valueList, test, sick, sick_group, date,fn)
  }, error = function(e) {
    warning(paste("Error in female group:", e$message))
    data.frame()
  })
  save(RunAf, file = paste0(fn, 'female.RData'))

  # Run for males
  Group <- 'male'
  message("Starting mediation analysis for group: male")
  RunAm <- tryCatch({
    RunMediationAnalysis(TreatmentVariables, MediatorVariables, OutcomeVariables, AllData, Group, name, simss, t.valueList, test, sick, sick_group, date,fn)
  }, error = function(e) {
    warning(paste("Error in male group:", e$message))
    data.frame()
  })
  save(RunAm, file = paste0(fn, 'male.RData'))

  message("Mediation analysis completed for all groups.")
}


# Set the number of simulations and other parameters
simss = 100
name = 'Jaot_OK_basic'
joo = 'ei'

# Extract specific columns from the dataset for analysis
# ccova = CombinedData[, c("Steatosis.Grade.0.To.3", "Fibrosis.Stage.0.to.4", "Necroinflammation", "HOMA-IR")]

# Define a group based on a condition (sum of specific columns > 4)
# sick_group = rowSums(ccova) > 4

# Define file names for different outcomes
file_names = c("Steatosis", "Fibrosis", "Necroinflammation", "HOMAIR", 'Menopause')

# Set additional parameters
lkm = 30
# joo = 'ei'
sick = 'yes'
t.valueList = 'no'
Group = 'All'
# date='houdeesh'


# Set parameters for the 'All' group analysis
joo = 'joo'
sick = 'no'

# Extract specific columns from the dataset for analysis
# ccova = CombinedData[, c("Steatosis.Grade.0.To.3", "Fibrosis.Stage.0.to.4", "Necroinflammation", "HOMA-IR")]

# Define the file name and group based on a condition (sum of specific columns > 4)
fn = 'All'
# sick_group = rowSums(ccova) > 4

# Driving the analysis takes more than 1h (with a basic laptop 2023,100sims and all three steroidGroups (all, female and male)) so drive the below only if needed:
RunEssentialAnalysis(TreatmentVariables, MediatorVariables, OutcomeVariables,AllData,Group,name,simss,t.valueList,test,sick, sick_group,fn,lkm,date,joo) #ok
## [1] "Group: All - Rows after filtering: 104"
## [1] "Group: All - Rows after sick filter: 104"
## [1] "Group: female - Rows after filtering: 69"
## [1] "Group: female - Rows after sick filter: 69"
## [1] "Group: male - Rows after filtering: 35"
## [1] "Group: male - Rows after sick filter: 35"
# setwd("C:/Users/patati/Desktop/Turku/R/basic causal mediation with the counterfactuals/") #check this if needed...

# Error in mediate(b, c, treat = x[z], mediator = m[j], sims = simss) : 
  # unused arguments (treat = x[z], mediator = m[j], sims = simss) ...

# In case, the 'case' sample/subject analysis would be needed: 
# Steatosis
OutcomeVariables="Steatosis Grade"
ccovae=CombinedData[,c("Steatosis.Grade.0.To.3")]; sick_group=ccovae>0 #toth# # hist(ccovae,breaks=100) # hist(ccova[,'HOMA-IR'],breaks=100)
fn=file_names[1];
RunEssentialAnalysis(TreatmentVariables, MediatorVariables, OutcomeVariables, AllData, Group, name, simss, t.valueList, test, sick, sick_group, date = format(Sys.Date(), "%Y-%m-%d"), fn, lkm, joo) #toimiinnee/toimi (25725), jos ei niin copilotaa 
## [1] "Group: All - Rows after filtering: 104"
## [1] "Group: All - Rows after sick filter: 104"
## [1] "Running mediation for: PFHpA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## Saving results to: C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/100 basic_divisions_Steatosis_All_2026-04-14_mediation_results.xlsx
## [1] "Group: female - Rows after filtering: 69"
## [1] "Group: female - Rows after sick filter: 69"
## [1] "Running mediation for: PFHpA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## Saving results to: C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/100 basic_divisions_Steatosis_female_2026-04-14_mediation_results.xlsx
## [1] "Group: male - Rows after filtering: 35"
## [1] "Group: male - Rows after sick filter: 35"
## [1] "Running mediation for: PFHpA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS S Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS B Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11b.OHA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KDHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KA4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHEA Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS T.Epi.T Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS AN Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHT Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DOC Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P5 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P4 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E2 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E1 Steatosis.Grade - Treat val: 1 - Control val: 0"
## Saving results to: C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/100 basic_divisions_Steatosis_male_2026-04-14_mediation_results.xlsx
# #Fibrosis
OutcomeVariables="Fibrosis Stage"
ccovae=CombinedData[,c("Fibrosis.Stage.0.to.4")];
sick_group=ccovae>0 #toth#
fn=file_names[2];
RunEssentialAnalysis(TreatmentVariables, MediatorVariables, OutcomeVariables, AllData, Group, name, simss, t.valueList, test, sick, sick_group, date = format(Sys.Date(), "%Y-%m-%d"), fn, lkm, joo)
## [1] "Group: All - Rows after filtering: 104"
## [1] "Group: All - Rows after sick filter: 104"
## [1] "Running mediation for: PFHpA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## Saving results to: C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/100 basic_divisions_Fibrosis_All_2026-04-14_mediation_results.xlsx
## [1] "Group: female - Rows after filtering: 69"
## [1] "Group: female - Rows after sick filter: 69"
## [1] "Running mediation for: PFHpA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## Saving results to: C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/100 basic_divisions_Fibrosis_female_2026-04-14_mediation_results.xlsx
## [1] "Group: male - Rows after filtering: 35"
## [1] "Group: male - Rows after sick filter: 35"
## [1] "Running mediation for: PFHpA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS S Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS B Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11b.OHA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KDHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X11.KA4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHEA Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS T.Epi.T Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS X17a.OHP4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS AN Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DHT Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS DOC Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P5 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS P4 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS A Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E2 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHpA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxA_Branched E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFHxS E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFNA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOA E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## [1] "Running mediation for: PFOS E1 Fibrosis.Stage - Treat val: 1 - Control val: 0"
## Saving results to: C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/100 basic_divisions_Fibrosis_male_2026-04-14_mediation_results.xlsx
# #Necroinfl.
# OutcomeVariables="Necroinflammation"
# ccovae=CombinedData[,c("Necroinflammation")]; sick_group=ccovae>0 #toth#
# fn=file_names[3];
# RunEssentialAnalysis(TreatmentVariables, MediatorVariables, OutcomeVariables,AllData,Group,name,simss,t.valueList,test,sick,sick_group,fn,lkm,date,joo,ip)
# Homa # remember to always test the function with minimun number setting or as light parameters as possible to get it through... before big runs
# OutcomeVariables="HOMA-IR"
# joo='ei';sick='yes'
# ccovae=CombinedData[,c("HOMA-IR")]; sick_group=ccovae>1.5 #toth#
# fn=file_names[4]; 
# RunEssentialAnalysis(TreatmentVariables, MediatorVariables, OutcomeVariables,AllData,Group,name,simss,t.valueList,test,sick,sick_group,fn,lkm,date,joo,ip)

# In addition, then there is also possibility for elaborations, i.e. the adjustments, of the models, 
# but I'll leave it from here for the time being... (please do ask if needed: patati 'tilde' utu dot fi) :)
# fyi: https://bookdown.org/content/b472c7b3-ede5-40f0-9677-75c3704c7e5c/more-than-one-mediator.html



#Some commenting with Copilot was done here above segment.

Making Heatmaps for Indirect and Direct Effects

# setwd("C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/") #check this if needed...
library(readxl)
setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis")
all_all=read_xlsx(path = "100basic All tikka3624 .xlsx") # #ok
# all_all=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/hypo_basic/100 hypo_b_no_not sick All tikka221024 .xlsx") # #_2 :)
all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all[,1]),];rownames(all_all)=all_all[,1]; all_all=all_all[,2:dim(all_all)[2]]; all_all=all_all[rev(order(all_all[,1])),]
all_all=all_all; #all_all=all_all[all_all[,1]>0,]
#https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
#https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation
#https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot
steroidGroups=groupValues
steroidGroups[,'Abbreviation'][steroidGroups[,'Abbreviation']=='17aOH-P4']='17a-OHP4'

#Switch = 0: PFAS vs steroids; switch=1: PFAS vs BAs and lipids, switch=2: steroids vs BAs and lipids (0-2 with both ACME and ADE (z='dir'))

CreateHeatmaps=function(hoi, rt2, switch, mn, z, corr, date, neg) {
  indir=c(); dir=c(); ip=c(); rn=c(); rn2=c()
  OutcomeVariables=colnames(CovariatesNonScaledData)[c(29:51,59:71)]; # The final dataframe is shorter or the like so there were less variables here...
  TreatmentVariables=colnames(CovariatesNonScaledData)[52:58];
  ## https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/
  
  # Direct mediation analysis
  if (switch==1) {
    Mediator_ok=OutcomeVariables[OutcomeVariables %in% names(table(hoi[1:dim(hoi)[1],c(3)]))]
    for (i in 1:7) {
      for (j in 1:length(Mediator_ok)) {
        if (z=='dir') {
          ap=rt2[which(hoi[,1]==TreatmentVariables[i] & hoi[,3]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ADE']))) {
            if (median(ap[,'ADE']) < quantile(rt2[,'ADE'],0.5)) {
              apa=min(ap[,'ADE']);
              ape=ap[ap[,'ADE']==min(ap[,'ADE']),'z0.p']
            } else {
              apa=max(ap[,'ADE']);
              ape=ap[ap[,'ADE']==max(ap[,'ADE']),'z0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        } else {
          ap=rt2[which(hoi[,1]==TreatmentVariables[i] & hoi[,3]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ACME']))) {
            if (median(ap[,'ACME']) < quantile(rt2[,'ACME'],0.5)) {
              apa=min(ap[,'ACME']);
              ape=ap[ap[,'ACME']==min(ap[,'ACME']),'d0.p']
            } else {
              apa=max(ap[,'ACME']);
              ape=ap[ap[,'ACME']==max(ap[,'ACME']),'d0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        }
        rn=append(rn,hoi[,3][which(hoi[,1]==TreatmentVariables[i] & hoi[,3]==Mediator_ok[j])[1]]) # change this...
        rn2=append(rn2,hoi[,1][which(hoi[,1]==TreatmentVariables[i] & hoi[,3]==Mediator_ok[j])[1]])
        
        Matrix <- matrix(0, nrow = length(TreatmentVariables), ncol = length(Mediator_ok))
        myData <- data.frame(matrix = Matrix)
        colnames(myData) <- Mediator_ok; rownames(myData) <- TreatmentVariables
      }
    }
  } else if (switch==0) {
    # Indirect mediation analysis
    Mediator_ok=colnames(AllData)[9:28][colnames(AllData)[9:28] %in% names(table(hoi[1:dim(hoi)[1],c(2)]))]
    for (i in 1:7) {
      for (j in 1:length(Mediator_ok)) {
        if (z=='dir') {
          ap=rt2[which(hoi[,1]==TreatmentVariables[i] & hoi[,2]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ADE']))) {
            if (median(ap[,'ADE']) < quantile(rt2[,'ADE'],0.5)) {
              apa=min(ap[,'ADE']);
              ape=ap[ap[,'ADE']==min(ap[,'ADE']),'z0.p']
            } else {
              apa=max(ap[,'ADE']);
              ape=ap[ap[,'ADE']==max(ap[,'ADE']),'z0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        } else {
          ap=rt2[which(hoi[,1]==TreatmentVariables[i] & hoi[,2]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ACME']))) {
            if (quantile(ap[,'ACME'],0.50) < quantile(rt2[,'ACME'],0.5)) {
              apa=min(ap[,'ACME']);
              ape=ap[ap[,'ACME']==min(ap[,'ACME']),'d0.p']
            } else {
              apa=max(ap[,'ACME']);
              ape=ap[ap[,'ACME']==max(ap[,'ACME']),'d0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        }
        rn=append(rn,hoi[,2][which(hoi[,1]==TreatmentVariables[i] & hoi[,2]==Mediator_ok[j])[1]]) # change this...
        rn2=append(rn2,hoi[,1][which(hoi[,1]==TreatmentVariables[i] & hoi[,2]==Mediator_ok[j])[1]])
        
        Matrix <- matrix(0, nrow = length(TreatmentVariables), ncol = length(Mediator_ok))
        myData <- data.frame(matrix = Matrix)
        colnames(myData) <- Mediator_ok; rownames(myData) <- TreatmentVariables
      }
    }
  } else if (switch==2) {
    TreatmentVariables=colnames(AllData)[9:28]; # These names are a bit mixed, but the idea is ok.
    Mediator_ok=OutcomeVariables[OutcomeVariables %in% names(table(hoi[1:dim(hoi)[1],c(3)]))]
    
    for (i in 1:length(TreatmentVariables)) {
      for (j in 1:length(Mediator_ok)) {
        if (z=='dir') {
          ap=rt2[which(hoi[,2]==TreatmentVariables[i] & hoi[,3]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ADE']))) {
            if (median(ap[,'ADE']) < quantile(rt2[,'ADE'],0.5)) {
              apa=min(ap[,'ADE']);
              ape=ap[ap[,'ADE']==min(ap[,'ADE']),'z0.p']
            } else {
              apa=max(ap[,'ADE']);
              ape=ap[ap[,'ADE']==max(ap[,'ADE']),'z0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        } else {
          ap=rt2[which(hoi[,2]==TreatmentVariables[i] & hoi[,3]==Mediator_ok[j]),]; 
          if (!is.na(median(ap[,'ACME']))) {
            if (median(ap[,'ACME']) < quantile(rt2[,'ACME'],0.5)) {
              apa=min(ap[,'ACME']);
              ape=ap[ap[,'ACME']==min(ap[,'ACME']),'d0.p']
            } else {
              apa=max(ap[,'ACME']);
              ape=ap[ap[,'ACME']==max(ap[,'ACME']),'d0.p']
            }
          } else {
            apa=0; ape=1
          }
          indir=append(indir,apa) # or c(1) hoi 1 or 5 (5 is orig)
          ip=append(ip,ape)
        }
        rn=append(rn,hoi[,3][which(hoi[,2]==TreatmentVariables[i] & hoi[,3]==Mediator_ok[j])[1]]) # change this...
        rn2=append(rn2,hoi[,2][which(hoi[,2]==TreatmentVariables[i] & hoi[,3]==Mediator_ok[j])[1]])
        Matrix <- matrix(0, nrow = length(TreatmentVariables), ncol = length(Mediator_ok))
        myData <- data.frame(matrix = Matrix)
        colnames(myData) <- Mediator_ok; rownames(myData) <- TreatmentVariables
      }
    }
  } # You need three of these, yes :) ;print(ap[,'ADE']) print(rownames(ap[ap[,'ADE']==min(ap[,'ADE']),]))
  
  tot=cbind(rn2,rn,indir) # or indir or dir
  tot=tot[!is.na(tot[,1]),]
  tot=as.data.frame(tot)

  
  # Here you would need to have an evaluation for the 'else', if it is negative, it needs to be max negative, and vice versa for the
  # There does not seem to be doubles: table(tot[,1])
  
  tot[,3]=as.numeric(tot[,3])
  
  library(reshape2)
  AuxMed=dcast(tot, rn2~rn, value.var='indir')
  AuxMed[is.na(AuxMed)]=0
  rownames(AuxMed)=AuxMed[,1]
  AuxMed=AuxMed[,2:dim(AuxMed)[2]]
  AuxMed=as.data.frame(AuxMed)
  AuxMedr=matrix(as.numeric(unlist(AuxMed)),nrow=dim(AuxMed)[1],ncol=dim(AuxMed)[2])
  colnames(AuxMedr)=colnames(AuxMed); rownames(AuxMedr)=rownames(AuxMed)
  resulta1 <- AuxMedr   # ok define resulta1
 
  # Ensure all rows and columns from myData are present in AuxMedr
  if (sum(!rownames(myData) %in% rownames(AuxMedr)) > 0) {
    to_df=rownames(myData)[!rownames(myData) %in% rownames(AuxMedr)]
    AuxMedr=rbind(AuxMedr, myData[to_df,]); AuxMedr=AuxMedr[rownames(myData),]
  }
  if (sum(!colnames(myData) %in% colnames(AuxMedr)) > 0) {
    to_df=colnames(myData)[!colnames(myData) %in% colnames(AuxMedr)]
    AuxMedr=cbind(AuxMedr, myData[,to_df]); AuxMedr=AuxMedr[,colnames(myData)]
  }
  
  tot=cbind(rn2, rn, ip)
  tot=tot[!is.na(tot[,1]),]
  tot=as.data.frame(tot)
  tot[,3]=as.numeric(tot[,3])
  
  AuxMeda=dcast(tot, rn2~rn, value.var='ip')
  AuxMeda[is.na(AuxMeda)]=0
  rownames(AuxMeda)=AuxMeda[,1]
  AuxMeda=AuxMeda[,2:dim(AuxMeda)[2]]
  AuxMedra=matrix(as.numeric(unlist(AuxMeda)),nrow=dim(AuxMeda)[1],ncol=dim(AuxMeda)[2])
  colnames(AuxMedra)=colnames(AuxMeda); rownames(AuxMedra)=rownames(AuxMeda)
  p.mat.a1 <- AuxMedra     # ok 
  
  if (sum(!rownames(myData) %in% rownames(AuxMedra)) > 0) {
    to_df=rownames(myData)[!rownames(myData) %in% rownames(AuxMedra)]
    AuxMedra=rbind(AuxMedra, myData[to_df,]); AuxMedra=AuxMedra[rownames(myData),]
  }
  if (sum(!colnames(myData) %in% colnames(AuxMedra)) > 0) {
    to_df=colnames(myData)[!colnames(myData) %in% colnames(AuxMedra)]
    AuxMedra=cbind(AuxMedra, myData[,to_df]); AuxMedra=AuxMedra[,colnames(myData)]
  }
  
  if (switch==1) {
    # For direct effects
  } else if (switch==0) {
    # For indirect effects
    AuxMedra=AuxMedra[,steroidGroups[,'Abbreviation'][steroidGroups[,'Abbreviation'] %in% colnames(AuxMedra)]]
    AuxMedr=AuxMedr[,steroidGroups[,'Abbreviation'][steroidGroups[,'Abbreviation'] %in% colnames(AuxMedr)]]
  } else if (switch==2) {
    AuxMedra=AuxMedra[steroidGroups[,'Abbreviation'],]
    AuxMedr=AuxMedr[steroidGroups[,'Abbreviation'],]
  }
  
    # With the neg. you do not put these:
  if (dim(resulta1)[2]==36) {
  OutcomeVariables=colnames(CovariatesNonScaledData)[c(29:51,59:71)];
  OutcomeVariables=OutcomeVariables[c(1,23,2:22,24:length(OutcomeVariables))]
  resulta1=resulta1[,OutcomeVariables[OutcomeVariables %in% colnames(resulta1) ]];p.mat.a1=p.mat.a1[,OutcomeVariables[OutcomeVariables %in% colnames(p.mat.a1) ]] }
  for (i in 1:dim(resulta1)[1]) {for (j in 1:dim(resulta1)[2]) {if (resulta1[i,j]==0) {p.mat.a1[i,j]=0.5}}} 
  # resulta1 <- t(resulta1);
  # p.mat.a1 <- t(p.mat.a1)
  # ou=round(min(c(abs(max(resulta1)),abs(min(resulta1)))),2)
  # op=ou-0.01
  ImgRange=abs(round((min(resulta1)),1)); ImgRangea=abs(round((max(resulta1)),1))
  ImgRange2=abs(round((max(resulta1)-0.01),3));ImgRange3=abs(round((min(resulta1)+0.01),3));
  
  if (ImgRangea-ImgRange >=0 ) {resulta1[resulta1 >= ImgRange2] = ImgRangea; resulta1[resulta1 <= -ImgRange3] = -ImgRangea; ImgRange=ImgRangea} else 
  if (ImgRangea-ImgRange < 0 ) {resulta1[resulta1 <= -ImgRange] = -ImgRange; resulta1[resulta1 >= ImgRange2] = ImgRange}  
  
  path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"; setwd(path) #check this if needed...
  width  <- 12
  height <- 10
  width  <- as.numeric(width)
  height <- as.numeric(height)
  jpeg(paste("Heatmap of high",date, mn,neg,".jpg"), width = width, height = height, quality = 100,pointsize = 14, res=300);# par( ps=ps)# par(cex.lab=90) 22 18
  # col = brewer.pal(n = 9, name = "YlOrRd")
  order="original"; range='orig';corre='no_renormaa'; type='full'; method='color';ga='All';gf='Female';gm='Male' #color square
  cl.offset=20;cl.length=7;cl.cex = 1.45;pch.cex=2.45;pch=2;cl.pos = 'r'; #cl.offset=2;cl.length=5;cl.cex = 1.3;pch.cex=1.95;pch=14;
  
  col=colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150)
  if (neg=='no') {col=colorRampPalette(c( 'white','orange'), alpha = TRUE)(150)} else if (neg=='yes')
  {col=colorRampPalette(c('blue', 'white'), alpha = TRUE)(150)} else {col=col}
  
  # if (corr==TRUE) {if (min(as.matrix(resulta1))< -1  | max(as.matrix(resulta1))> 1) {resulta1=rango(resulta1,-1,1)}} else if (min(as.matrix(resulta1)) >= 0)  {resulta1=rango(resulta1,-1,1)} #
  # resulta1=rango(resulta1,-1,1)
  # if (min(as.matrix(resulta1)) >= 0  | max(as.matrix(resulta1)) <= 0) {resulta1=rango(resulta1,-1,1)}
  
  corrplot(as.matrix(resulta1), type = type, order = order,method=method, p.mat=as.matrix(p.mat.a1), tl.col = "black", 
           cl.cex = cl.cex, pch.cex=pch.cex, pch.col='black',pch=pch,# Did I just need to add pch for the color to work.. '#FEE12B'
           sig.level = c(0.05),cl.pos = cl.pos, insig = "label_sig", cl.offset=cl.offset,cl.length=cl.length, #.001, .05, .2
           tl.srt = 90, diag = TRUE,col=col,is.corr = corr,col.lim=c(-ImgRange,ImgRange)) 
  #only in age...0.001,col.lim=c(-ImgRange,ImgRange) #rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)]) col.lim=c(-1,1) col.lim=c(-ImgRange,ImgRange)
  #non were significant in neg... but after mody yes!
  
  dev.off();
  eoh=paste("Heatmap of high",date, mn,neg,".jpg"); daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
  # my_image <- image_read(eoh);my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
  
  svglite::svglite(
  paste0("Heatmap of high", date, mn, neg, ".svg"),
  width  = width,
  height = height)

order="original"; range='orig'; corre='no_renormaa'
type='full'; method='color'
ga='All'; gf='Female'; gm='Male'

cl.offset = 20
cl.length = 7
cl.cex    = 1.45
pch.cex   = 2.45
pch       = 2
cl.pos    = 'r'

col = colorRampPalette(c('blue', 'white', 'orange'), alpha = TRUE)(150)
if (neg == 'no') {
  col = colorRampPalette(c('white','orange'), alpha = TRUE)(150)
} else if (neg == 'yes') {
  col = colorRampPalette(c('blue','white'), alpha = TRUE)(150)
}

corrplot(
  as.matrix(resulta1),
  type       = type,
  order      = order,
  method     = method,
  p.mat      = as.matrix(p.mat.a1),
  tl.col     = "black",
  cl.cex     = cl.cex,
  pch.cex    = pch.cex,
  pch.col    = "black",
  pch        = pch,
  sig.level  = c(0.05),
  cl.pos     = cl.pos,
  insig      = "label_sig",
  cl.offset  = cl.offset,
  cl.length  = cl.length,
  tl.srt     = 90,
  diag       = TRUE,
  col        = col,
  is.corr    = corr,
  col.lim    = c(-ImgRange, ImgRange)
)

dev.off()

  
  return(list(resulta1, p.mat.a1))
}

# Switch = 0: PFAS vs steroids; 
# Switch = 1: PFAS vs BAs and lipids, 
# Switch = 2: steroids vs BAs and lipids (0-2 with both ACME and ADE (z='dir'))
# corr=TRUE;z='idir' # uliulie2=CreateHeatmaps(hoi, switch=1,mn='indiruush',z,corr,date); dim(uliulie2[[1]]) 

# #Let's get this comparable done:
# First the 'matrisse'
corr=FALSE; z='dir'; switch=2 
u3=all_all; c1=c(); # mn='basicas' #
c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
rt2=c1[complete.cases(c1), ] # 0.49 is optimal p value cutoff to get dim(mat[[2]])[2] as 36
hoi = c(); hoi=scan(text=rownames(rt2), what="") # scan(text=rownames(rt2), what="")
hoi = matrix(hoi, ncol = 3,  byrow = TRUE); colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids') # ,'Gender') ,'Desig.')
hoi[,c(2)]  <- gsub("\\.", "-",  hoi[,c(2)]  ); hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
mn=paste0(z,'ade_s_vs_bal_no order3b'); neg='else'
mat=CreateHeatmaps(hoi, rt2, switch, mn, z, corr, date, neg); # dim(mat[[2]])[2] # Indirect effect, kasvata... dim(mat[[2]])[2] == 36

corr=FALSE; z='dir'; switch=1; 
u3=all_all;
c1=c() #
c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
c1=c1[rev(order(c1[,'ADE'])),]; #
c1=c1[c1[,'z0.p'] < 0.89, ] # 0.98/0.81/0.45/0.53/0.1... but gives 33 columns so use the other then..
# c1=c1[c1[,'d0.p'] < 0.50, ]
# let us start with the above optima... 643/or similar is dim(c1)[1] so need higher p to get all; (dim(c1)[1]-1)
# Check if this needs to be ADE instead...
c3=c1[c1[,'ADE'] < 0,] # -0.01,]#quantile(c1[,'ACME'])[2]
c1=c3;
rt2=c1[complete.cases(c1), ]
hoi=c(); hoi=scan(text=rownames(rt2), what="") # scan(text=rownames(rt2), what="")
hoi=matrix(hoi, ncol = 3,  byrow = TRUE); colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids') # ,'Gender') ,'Desig.')
hoi[,c(2)]  <- gsub("\\.", "-",  hoi[,c(2)]  ); hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
# mat_neg=list(c(1,2),c(3,4))
mn=paste0(z,'neg'); neg='yes'
mat_neg=CreateHeatmaps(hoi, rt2, switch, mn, z, corr, date, neg); 
# dim(mat_neg[[2]])[2]  #Increase n so that dim(mat_pos[[2]])[2] equals the length of c(x3, x6), i.e., 36.

u3=all_all; c1=c(); # mn='posae'
c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
c1=c1[rev(order(c1[,'ACME'])),]; # Check acmes and ades
c1=c1[c1[,'z0.p']<0.89, ]
c2=c1[c1[,'ADE'] >= 0,]
c1=c2; # 
# c1= c1[sample(1:nrow(c1)), ];
rt2=c1[complete.cases(c1), ] # rt2[1,]=rt2[1,]
hoi=c(); hoi=scan(text=rownames(rt2), what="") # scan(text=rownames(rt2), what="")
hoi=matrix(hoi, ncol = 3,  byrow = TRUE); colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids') # ,'Gender') ,'Desig.')
hoi[,c(2)]  <- gsub("\\.", "-",  hoi[,c(2)]  ); hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
mn=paste0(z,'posa2aa'); neg='no'
mat_pos=CreateHeatmaps(hoi, rt2, switch, mn, z, corr, date, neg='no');

the_real=c(); the_real <- matrix(0, nrow = dim(mat[[1]])[1], ncol = dim(mat[[1]])[2]); the_real <- data.frame(the_real)
colnames(the_real) <- colnames(mat[[1]]); rownames(the_real) <- rownames(mat[[1]])
the_real2=c(); the_real2 <- matrix(0, nrow = dim(mat[[1]])[1], ncol = dim(mat[[1]])[2]); the_real2 <- data.frame(the_real2)
colnames(the_real2) <- colnames(mat[[1]]); rownames(the_real2) <- rownames(mat[[1]])

m1=mat[[1]]; m2=mat_pos[[1]]; m3=mat_neg[[1]]; m4=mat[[2]]; m5=mat_pos[[2]]; m6=mat_neg[[2]]
# m3=m3[,OutcomeVariables[OutcomeVariables %in% colnames(m3) ]]; m6=m6[,OutcomeVariables[OutcomeVariables %in% colnames(m6) ]]

# or... new logic:
the_real=m1; the_real2=m4
# for (i in rownames(m2)) {for (j in colnames(m2)) {if (the_real2[i,j] > 0 ) {the_real[i,j]=m1[i,j]; the_real2[i,j]=m4[i,j]}}}
the_real[the_real >= 0 & the_real < 0.03] = m2[the_real >= 0 & the_real < 0.03]
the_real2[the_real >= 0 & the_real < 0.03] = m5[the_real >= 0 & the_real < 0.03]
the_real[the_real < 0 & the_real > -0.04] = m3[the_real < 0 & the_real > -0.04]
the_real2[the_real < 0 & the_real > -0.04] = m6[the_real < 0 & the_real > -0.04]

# resulta1_big_id=mg # ma
# p.mat.a1_big_id=mgg # maa
# resulta1_big_id=ma
# p.mat.a1_big_id=maa
resulta1_big_id=the_real
p.mat.a1_big_id=the_real2
# resulta1_big_id=m1
# p.mat.a1_big_id=m4

resulta1 <- as.matrix(resulta1_big_id); p.mat.a1 <- as.matrix(p.mat.a1_big_id)
resulta1[is.na(resulta1)]=0

# With the neg. you do not put these:
if (dim(resulta1)[2]==36) {
  OutcomeVariables=colnames(CovariatesNonScaledData)[c(29:51,59:71)];
  OutcomeVariables=OutcomeVariables[c(1,23,2:22,24:length(OutcomeVariables))]
  resulta1=resulta1[,OutcomeVariables[OutcomeVariables %in% colnames(resulta1) ]]; p.mat.a1=p.mat.a1[,OutcomeVariables[OutcomeVariables %in% colnames(p.mat.a1) ]] 
}
for (i in 1:dim(resulta1)[1]) {for (j in 1:dim(resulta1)[2]) {if (resulta1[i,j]==0) {p.mat.a1[i,j]=0.5}}}

# In addition, you would need:
ImgRange=abs(round((min(resulta1)),1))
ImgRange2=abs(round((min(resulta1)+0.01),2))
resulta1[resulta1 > ImgRange] = ImgRange
resulta1[resulta1 < -ImgRange2] = -ImgRange

# resulta1[resulta1 < 0] = 0 
# so this extends the minimum value to the (max) min limit so as to get the true blue to be seen, at least once
path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"; setwd(path) # check this if needed...
if (dim(resulta1)[1]==7) {width = 4000; height=1500} else if (dim(resulta1)[1]==20) {width = 4000; height=2500} else if (dim(resulta1)[1]==36) {width = 2500; height=5000}
jpeg(paste("Heatmap of highed_the_real_ade_mat",z,date,".jpg"), width = width, height = height, quality = 100, pointsize = 14, res=300);
col=colorRampPalette(c('blue', 'white','orange'), alpha = TRUE)(150)

order="original"; range='orig'; corre='no_renormaa'; type='full'; method='color'; ga='All'; gf='Female'; gm='Male' # color square
cl.offset=25; cl.length=7; cl.cex = 1.1; pch.cex=1.95; pch=3; cl.pos = 'r'; 
# cl.offset=2; cl.length=5; cl.cex = 1.3; pch.cex=1.95; pch=14;

corrplot(resulta1, type = type, order = order, method=method, p.mat=p.mat.a1, tl.col = "black", # sum(COL2('RdBu')=="#FF7417")
         cl.cex = cl.cex, pch.cex=pch.cex, pch.col='black', pch=pch, # one colore option: '#FEE12B', check also pch:s that is is ok
         sig.level = c(0.05), cl.pos = cl.pos, insig = "label_sig", cl.offset=cl.offset, cl.length=cl.length,
         tl.srt = 90, diag = TRUE, col=col, is.corr = FALSE, col.lim=c(-ImgRange, ImgRange)) # only in age...0.001, -2,2 

dev.off(); 
## png 
##   2
# eoh=paste("Heatmap of highed_the_real_ade_mat",z,date,".jpg"); daiR::image_to_pdf(eoh, pdf_name=paste0(eoh,'.pdf'))
# my_image <- image_read(eoh); my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh,".svg"))
# path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"; setwd(path)

path = "C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"
setwd(path)

if (dim(resulta1)[1] == 7) {
  width = 4000; height = 1500
} else if (dim(resulta1)[1] == 20) {
  width = 4000; height = 2500
} else if (dim(resulta1)[1] == 36) {
  width = 2500; height = 5000
}

# ---- REAL SVG DEVICE ----
svglite::svglite(
  paste0("Heatmap of highed_the_real_ade_mat", z, date, ".svg"),
  width  = width / 300,   # convert px-equivalent → inches
  height = height / 300
)

col = colorRampPalette(c('blue', 'white', 'orange'), alpha = TRUE)(150)

order  = "original"
range  = 'orig'
corre  = 'no_renormaa'
type   = 'full'
method = 'color'
ga='All'; gf='Female'; gm='Male'

cl.offset = 25
cl.length = 7
cl.cex    = 1.1
pch.cex   = 1.95
pch       = 3
cl.pos    = 'r'

corrplot(
  resulta1,
  type       = type,
  order      = order,
  method     = method,
  p.mat      = p.mat.a1,
  tl.col     = "black",
  cl.cex     = cl.cex,
  pch.cex    = pch.cex,
  pch.col    = 'black',
  pch        = pch,
  sig.level  = c(0.05),
  cl.pos     = cl.pos,
  insig      = "label_sig",
  cl.offset  = cl.offset,
  cl.length  = cl.length,
  tl.srt     = 90,
  diag       = TRUE,
  col        = col,
  is.corr    = FALSE,
  col.lim    = c(-ImgRange, ImgRange)
)

dev.off()
## png 
##   2
setwd(path)
  

#Copilot commenting worked for CreateHeatmaps function and some of the drives.
knitr::include_graphics(paste0('Heatmap of highed_the_real_ade_mat dir ',date,' .jpg'))
Heatmap of 'Indirect' Effects PFAS vs. Steroids with All Subjects

Heatmap of ‘Indirect’ Effects PFAS vs. Steroids with All Subjects

Making Sankey Diagrams

# To do these diagrams, you need to have a reduced dataset as well as a function that accounts for the group (male/female)
ReduceData=function(u3, Group, name, lkm, d) {
  c1=c() # u3=all_all1
  ACMEMedian=c(); ACMEpval=c(); ACMEVar=c()
  ADEMedian=c(); ADEpval=c(); ADEVar=c()
  c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
  ACMEMedian=0 # median(c1[,'ACME'][c1[,'ACME']>0])
  # c1=c1[order(c1[,'ACME']),];  # for 'negative' acmes
  c1=c1[rev(order(c1[,'ACME'])),];  
  # c1=c1[c1[,'ACME']<ACMEMedian & ((c1[,'ADE']-c1[,'ACME']) > 0), ] # c1=c1[c1[,'d0.p']<0.1, ]
  c1=c1[c1[,'ACME'] > ACMEMedian & ((c1[,'ACME']-c1[,'ADE']) > 0), ] # c1=c1[c1[,'d0.p']<0.1, ] 
  
  c1=tryCatch({c1[1:lkm,]}, error = function(msg){return(c1)})
  
  write.xlsx(c1, file = paste(name, Group, date, '.xlsx'), append = FALSE, row.names = TRUE)
  
  return(c1)
}

CreateSankeyPlots=function(RunAma, date, sick, Group, d) {
  # RunAma = na.omit(c1)
  rt2=RunAma #[,1:17]# rtot=rtot[,1:17]# rtot=data.frame(rtot) # name=paste(simss,'basic hypothesis',take)
  # https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html
  # https://www.middleprofessor.com/files/applied-biostatistics_bookdown/_book/adding-covariates-to-a-linear-model
  # https://github.com/MarioniLab/miloR
  # https://www.nature.com/articles/s41467-023-40458-9/figures/4
  name=paste('Contaminants_Steroids_BAs_or_Lipids_sims', date) # rtot=rtot_2000_mrct # rtot=uh5
  
  hoi=c(); 
  if (d=='t') {
    hoi=scan(text=rt2[,1], what=" ")
  } else {
    hoi=scan(text=rownames(rt2), what=" ")
  } # rownames(rt2)# names(rt2[,1]) rownames(rt2
  # hoi=scan(text=rownames(rt2), what=" ")# rownames(rt2)# names(rt2[,1]) rownames(rt2
  hoi=as.data.frame(matrix(hoi, ncol = 3,  byrow = TRUE), stringsAsFactors = FALSE) # Check this number (ncol) 3/4
  # hoi=cbind(hoi[,1:3],rt2[,2])
  hoi=hoi[,1:3]
  # colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Weight')# ,'Gender') ##
  colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids')
  # ,'Gender') ## https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
  # https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation
  # https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot
  
  hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH.P4']='17a-OHP4'
  hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
  hoi[,'Steroids' ]  <- gsub("\\.", "-",  hoi[,'Steroids' ] ) #:)
  hoi[,'Steroids' ][ hoi[,'Steroids' ]=='T-Epi-T']='T/Epi-T'
  # df2 <- hoi %>%make_long('Contaminants','Steroids','Bile Acids or Lipids','Weight') # see the sankey test file 17.10.24 for this...
  
  df2 <- hoi %>%make_long('Contaminants','Steroids','Bile Acids or Lipids') 
  
  # In case you need to print to computer: meda='Sankey plot of ', pdf(paste(meda, name, sick, Group, ".pdf"), width = 20, height = 20,  pointsize = 18);
  # print(ggplot(df2, aes(x = x,  next_x = next_x, node = node,  next_node = next_node, fill = factor(node), label = node)) +
  # geom_sankey(flow.alpha = 0.5, node.color = 1) + geom_sankey_label(size = 5.5, color = 1, fill = "white") +
  # scale_fill_viridis_d() + theme_sankey(base_size = 30) + theme(legend.position = "none") + theme(axis.title.x = element_blank())); dev.off()
  
  windowsFonts(A = windowsFont("Calibri (Body)")) 

  p=ggplot(df2, aes(x = x,  next_x = next_x, node = node,  next_node = next_node, fill = factor(node), label = node)) +
            geom_sankey(flow.alpha = 1, node.color = 1) +
            # geom_sankey_label(size = 4.0, color = 1, fill = "white") + #
            geom_sankey_label(size = 8.0, color = 1, fill = "white") +
            scale_fill_viridis_d(option = "H", alpha = 0.75) +
            # scale_fill_viridis_c(option = "turbo") +
            # theme_sankey(base_size = 23) + #
            theme_sankey(base_size = 28) + theme(legend.position = "none") +
            # scale_fill_grey(start = 0.5, end = 0.5) +
            theme(axis.text.x = element_text(hjust = 0.5, vjust=7, colour = 'black') ) + # https://stackoverflow.com/questions/38862303/customize-ggplot2-axis-labels-with-different-colors
            theme(axis.title.x = element_blank())

  # Nicer colors than grey:
  # p=ggplot(df2, aes(x = x,  next_x = next_x, node = node,  next_node = next_node, fill = factor(node), label = node)) +
  # geom_sankey(flow.alpha = 0.5, node.color = 1) + geom_sankey_label(size = 3.5, color = 1, fill = "white") +
  # scale_fill_viridis_d() + theme_sankey(base_size = 16) + theme(legend.position = "none") + theme(axis.title.x = element_blank());

  library(ragg)
  # Oh! https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
  # https://r4ds.had.co.nz/graphics-for-communication.html#figure-sizing

  path="C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/" # oh, classical: https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2
  pngfile <- fs::path(path, paste0(Group, 'e', d, ".png")) # fs::path(knitr::fig_path(),  "theming2.png")
  # agg_png(pngfile, width = 30, height = 40, units = "cm", res = 300, scaling = 2) #
  agg_png(pngfile, width = 60, height = 80, units = "cm", res = 600, scaling = 2)
  plot(p)
  invisible(dev.off())
  knitr::include_graphics(pngfile)
  eoh=paste0(Group, 'e', d, ".png"); daiR::image_to_pdf(eoh, pdf_name=paste0(eoh, '.pdf'))
  my_image <- image_read(eoh); my_svg <- image_convert(my_image, format="svg"); image_write(my_svg, paste(eoh, ".svg"))
}

library(readxl)
# setwd("C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/") #check this if needed...
all_all=read_xlsx(path = "100basic All tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_all=read_xlsx(path = "C:/Users/patati/Desktop/Turku/R/hypo_basic/100 hypo_b_no_not sick All tikka221024 .xlsx") # #_2 :)
all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all$ACME),]; all_all=na.omit(all_all); all_all1=all_all
all_Female=read_xlsx(path = "100basic Female tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
all_Female=as.data.frame(all_Female); all_Female=all_Female[!is.na(all_Female$ACME),]; all_Female=na.omit(all_Female);
all_Male=read_xlsx(path = "100basic Male tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
all_Male=as.data.frame(all_Male); all_Male=all_Male[!is.na(all_Male$ACME),]; all_Male=na.omit(all_Male);
 
sick='all samples';d='t'
lkm=30;Group='All'; name='just alala';date=paste0(date,'_allds')#dim(all_all)[1];
alma=ReduceData(all_all1,Group,name,lkm);
alma = na.omit(alma)
CreateSankeyPlots(alma,date,sick,Group,d)

date=paste0(date,'_femalads');name='just alala';Group='female';
almaf=ReduceData(all_Female,Group,name,lkm); #all_Female
almaf = na.omit(almaf)
CreateSankeyPlots(almaf,date,sick,Group,d)

date=paste0(date,'_malads');name='just alaal';Group='male';
almam=ReduceData(all_Male,Group,name,lkm);
almam = na.omit(almam) #https://www.tutorialspoint.com/how-to-remove-rows-from-data-frame-in-r-that-contains-nan
CreateSankeyPlots(almam,date,sick,Group,d)

# Alternative way to plot Sankeys with the ACMEs:
# Create data which can be used for Sankey
set.seed(111) # Set seed for reproducibility
theme_set(theme_light()) # Set the theme for the plots

# Trying to imitate the approach below:
u3=all_all1; d='t' # Assign dataset and set 'd' to 't'
c1=c() # Initialize c1

# Initialize ACME variables
ACMEMedian=c(); ACMEpval=c(); ACMEVar=c()

# Initialize ADE variables
ADEMedian=c(); ADEpval=c(); ADEVar=c()

c1= u3 #[u3[,'ADE'] < ADEMedian  & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
# ACMEMedian=0#median(c1[,'ACME'][c1[,'ACME']>0])
c1=c1[rev(order(c1[,'ACME'])),];  # Order c1 by ACME in descending order
c1=c1[((c1[,'ACME']-c1[,'ADE']) > 0), ] # Filter rows where ACME > ADE
c1=c1[c1[,'d0.p']<0.05, ] # Filter rows where p-value < 0.05
# c1=tryCatch({c1[1:lkm,]}, error = function(msg){return(c1)})
RunAma = na.omit(c1) # Remove rows with NA values
rt2=RunAma #[,1:17]# rtot=rtot[,1:17]# rtot=data.frame(rtot) # name=paste(simss,'basic hypothesis',take)
# https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html
# https://www.middleprofessor.com/files/applied-biostatistics_bookdown/_book/adding-covariates-to-a-linear-model
# https://github.com/MarioniLab/miloR
# https://www.nature.com/articles/s41467-023-40458-9/figures/4
name=paste('Contaminants_Steroids_BAs_or_Lipids_sims',date) # Create a name for the plot

hoi=c(); 
if (d=='t') {
  hoi=scan(text=rt2[,1] , what=" ") # Scan the first column of rt2
} else {
  hoi=scan(text=rownames(rt2) , what=" ") # Scan the row names of rt2
} # rownames(rt2)# names(rt2[,1]) rownames(rt2
# hoi=scan(text=rownames(rt2) , what=" ")# rownames(rt2)# names(rt2[,1]) rownames(rt2
hoi=as.data.frame(matrix(hoi, ncol = 3,  byrow = TRUE), stringsAsFactors = FALSE) # Convert hoi to a data frame with 3 columns
hoi=cbind(hoi[,1:3],rt2[,2]) # Combine hoi with the second column of rt2
colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Weight') # Set column names
# ,'Gender') ## https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
# https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation
# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot

# Replace specific values in the 'Steroids' column
hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH.P4']='17a-OHP4'
hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
hoi[,'Steroids' ]  <- gsub("\\.", "-",  hoi[,'Steroids' ] ) #:)
hoi[,'Steroids' ][ hoi[,'Steroids' ]=='T-Epi-T']='T/Epi-T'
  
# Create a long format data frame for Sankey plot
df22 <-
  hoi |>
  subset(Weight > -1.05) |>
  pivot_stages_longer(c("Contaminants", "Steroids", "Bile Acids or Lipids"), "Weight", "Bile Acids or Lipids")

# Set the position for the Sankey plot
pos <- position_sankey(v_space = "auto", order = "ascending", align = "justify")

# Create the Sankey plot
p <-
  ggplot(data = df22, mapping = aes(x = stage, y = Weight, group = node,
                  edge_id = edge_id, connector = connector)) + 
  theme_sankey(base_size = 28) + # Set the theme for the Sankey plot
  # theme(legend.position = "none") +
  # scale_fill_grey(start = 0.5, end = 0.5) +
  theme(axis.text.x = element_text(hjust = 0.5, vjust=7, colour = 'black') ) + # Customize axis text
  theme(axis.title.x = element_blank()) # Remove x-axis title

# Update the position for the Sankey plot
pos <- position_sankey(v_space = "auto", order = "descending")

# Add layers to the Sankey plot
p + geom_sankeyedge(aes(fill = Weight), position = pos) +
  geom_sankeynode(position = pos, fill = "#dfe0e6") +
  geom_text(aes(label = node), stat = "sankeynode", position = pos, cex = 2) +
  scale_fill_viridis_c(option = "turbo") # Set the color scale

# In case you need something else from somewhere else...
# setwd("C:/Users/patati/Desktop/Turku/R/hypo4/HOMAIR/perus_ok")
# all_all=read_xlsx(path = "100 hypo4_yes_sick All tikka76524 hyp4a_oki.xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all$ACME),]; all_all=na.omit(all_all); all_all1=all_all
# all_Female=read_xlsx(path = "100 hypo4_yes_sick Female tikka76524 hyp4a_oki.xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_Female=as.data.frame(all_Female); all_Female=all_Female[!is.na(all_Female$ACME),]; all_Female=na.omit(all_Female);
# all_Male=read_xlsx(path = "100 hypo4_yes_sick Male tikka76524 hyp4a_oki.xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #
# all_Male=as.data.frame(all_Male); all_Male=all_Male[!is.na(all_Male$ACME),]; all_Male=na.omit(all_Male);

# path="C:/Users/patati/Desktop/Turku/R/basic_cova/All"
# # setwd("C:/Users/patati/Desktop/Turku/R/basic_cova/All")
# files=list.files(path=path, pattern=".RData", all.files=TRUE, full.names=TRUE) #https://www.geeksforgeeks.org/read-all-files-in-directory-using-r/
# list_of_files <- list() 
# for (i in files) {list_of_files[[i]] <- get(load(paste0("", i)))}  #add files to list position
# names(list_of_files) <- str_remove(list.files(path=path, pattern=".RData", all.files=TRUE, full.names=FALSE),'.RData')  #https://stringr.tidyverse.org/reference/str_remove.html
# 
# # names(list_of_files)[1:3]
# setwd("C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/")
# all_all1=list_of_files[[1]] #all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all$ACME),]; all_all=na.omit(all_all); all_all1=all_all
# all_Female=list_of_files[[2]] #all_Female=as.data.frame(all_Female); all_Female=all_Female[!is.na(all_Female$ACME),]; all_Female=na.omit(all_Female);
# all_Male=list_of_files[[3]] #all_Male=as.data.frame(all_Male); all_Male=all_Male[!is.na(all_Male$ACME),]; all_Male=na.omit(all_Male);
# 
# d='nt';lkm=30;Group='All'; name='just all'
# alma=ReduceData(all_all1,Group,name,lkm);alma = na.omit(alma)
# CreateSankeyPlots(alma,date,sick,Group,d)
# 
# name='just all';Group='female';
# almaf=ReduceData(all_Female,Group,name,lkm); almaf = na.omit(almaf)
# CreateSankeyPlots(almaf,date,sick,Group,d)
# 
# name='just all';Group='male';
# almam=ReduceData(all_Male,Group,name,lkm); almam = na.omit(almam) 
# https://www.tutorialspoint.com/how-to-remove-rows-from-data-frame-in-r-that-contains-nan
# CreateSankeyPlots(almam,date,sick,Group,d)


##Alternetive plotting style
# ===== PACKAGES =====
suppressPackageStartupMessages({
  library(ggplot2)
  library(ggalluvial)
  library(readxl)
  library(dplyr)
  library(stringr)
  library(fs)
  library(ragg)
  # colorspace is used via after_scale() — part of ggplot2 v3.4+, colorspace enhances palettes
  # If you don't have colorspace installed, comment out the after_scale() line or install.packages("colorspace")
})

# ===== PARAMETERS =====
# Input Excel path
excel_path <- "C:/Users/patati/Desktop/Turku/R/tests6/tests_basic/100basic All tikka3623 .xlsx"

# Output directory (make sure it exists)
out_dir <- "C:/Users/patati/Documents/GitHub/Steroid_Data_Analysis/"

# How many top/bottom links to keep for negative/positive ACME
lkm <- 10

# Plot file name stem
file_stem <- "sankey_med_res_all_nonadj_g40l0.1"

# Font family (Windows: "Arial" usually works; otherwise pick an installed font)
base_family <- "Arial"

# ===== 1) LOAD =====
# all_all <- readxl::read_xlsx(path = excel_path)

# ===== 2) FILTER BY d0.p AND SELECT EXTREMES BY ACME =====
# Ensure the needed columns exist
required_cols <- c("ACME", "d0.p")
missing_cols <- setdiff(required_cols, names(all_all))
if (length(missing_cols)) {
  stop("Missing required columns in the Excel file: ", paste(missing_cols, collapse = ", "))
}

c1 <- as.data.frame(all_all)

# Keep rows with d0.p < 0.1
c1 <- c1 %>% filter(.data[["d0.p"]] < 0.1)

# Defensive: drop rows with missing ACME
c1 <- c1 %>% filter(!is.na(.data[["ACME"]]))

# Split negative/positive and take extremes by ACME
# (Use explicit dplyr:: namespace to avoid desc() masking issues)
neg <- c1 %>%
  dplyr::filter(.data[["ACME"]] < 0) %>%
  dplyr::arrange(.data[["ACME"]]) %>%
  dplyr::slice_head(n = lkm)

pos <- c1 %>%
  dplyr::filter(.data[["ACME"]] > 0) %>%
  dplyr::arrange(dplyr::desc(.data[["ACME"]])) %>%
  dplyr::slice_head(n = lkm)

c1_ext <- dplyr::bind_rows(neg, pos)

# Color by ACME sign
c1_ext <- c1_ext %>%
  mutate(color = ifelse(.data[["ACME"]] < 0, "blue", "orange"))

# ===== 3) PARSE THE FIRST COLUMN INTO THREE PARTS =====
# Assumption: the FIRST column of the sheet contains the composite name.
# If your composite label is in a different column, change [[1]] to the name, e.g. [["name_col"]].
name_col <- names(c1_ext)[1]
names_vec <- as.character(c1_ext[[name_col]])

# Light cleanup pass before splitting
names_vec <- str_trim(names_vec)
names_vec <- gsub("PFHxA_Branched", "PFHxA-B.", names_vec)

# Try underscore-first parsing; if a row doesn't have >= 2 underscores, fall back to space split.
split_underscore <- strsplit(names_vec, "_", fixed = TRUE)

parsed_mat <- vapply(split_underscore, function(x) {
  len <- length(x)
  if (len >= 3) {
    # Contaminant = first token, Steroid = second, Outcome = rest
    c(Contaminants = x[1],
      Steroids     = x[2],
      `Bile Acids or Lipids` = paste(x[3:len], collapse = "_"))
  } else {
    # Fallback: split by spaces if underscores are not present
    xs <- strsplit(paste(x, collapse = "_"), "\\s+")[[1]]
    len2 <- length(xs)
    if (len2 >= 3) {
      c(Contaminants = xs[1],
        Steroids     = xs[2],
        `Bile Acids or Lipids` = paste(xs[3:len2], collapse = " "))
    } else {
      c(Contaminants = NA_character_,
        Steroids     = NA_character_,
        `Bile Acids or Lipids` = NA_character_)
    }
  }
}, FUN.VALUE = c(Contaminants = "", Steroids = "", `Bile Acids or Lipids` = ""))

hoi <- as.data.frame(t(parsed_mat), stringsAsFactors = FALSE)

# Optional label normalizations
hoi$Contaminants <- gsub("PFHxA_Branched", "PFHxA_B.", hoi$Contaminants)
hoi$Steroids     <- gsub("17a\\.OHP5", "17a-OHP5", hoi$Steroids)
hoi$Steroids     <- gsub("17a\\.OHP4", "17a-OHP4", hoi$Steroids)
hoi$Steroids     <- gsub("11\\.KT",    "11-KT",    hoi$Steroids)
hoi$Steroids     <- gsub("11\\.KDHT",  "11-KDHT",  hoi$Steroids)

# Attach ACME & color back; keep only rows that parsed successfully
good <- complete.cases(hoi$Contaminants, hoi$Steroids, hoi$`Bile Acids or Lipids`)
hoi <- hoi[good, , drop = FALSE]
hoi$ACME  <- c1_ext$ACME[good]
hoi$color <- ifelse(hoi$ACME < 0, "blue", "orange")

# Optional: if you want to verify parsing results
# print(head(hoi, 20))

# ===== 4) BUILD THE ALLUVIAL PLOT =====
p <- ggplot(
  hoi,
  aes(
    axis1 = Contaminants,
    axis2 = Steroids,
    axis3 = `Bile Acids or Lipids`,
    y     = abs(ACME),
    fill  = color
  )
) +
  # Flows (no `lineend` — not supported in some ggalluvial versions)
  geom_alluvium(
    aes(
      fill   = color,
      # If colorspace is installed, this creates a darker edge from fill; otherwise comment it out.
      colour = after_scale(colorspace::darken(fill, 0.15))
    ),
    width = 0.15,
    knot.pos = 0.5,
    alpha = 0.9,
    size  = 0.6,
    show.legend = c(colour = FALSE)   # keep only the fill legend
  ) +
  # Strata (blocks)
  geom_stratum(width = 0.15, fill = "white", color = "black") +
  # Stratum labels
  geom_text(
    stat = "stratum",
    aes(label = after_stat(stratum)),
    size = 6.7, fontface = "bold", family = base_family
  ) +
  scale_x_discrete(
    limits = c("Contaminants", "Steroids", "Bile Acids or Lipids"),
    expand = c(.05, .05)
  ) +
  scale_fill_manual(
    name   = "ACME Direction",
    values = c("blue" = "blue", "orange" = "orange"),
    labels = c("Negative", "Positive"),
    na.translate = FALSE
  ) +
  theme_minimal(base_family = base_family) +
  theme(
    legend.position   = "right",
    legend.text       = element_text(size = 22, face = "bold", family = base_family),
    legend.title      = element_text(size = 22, face = "bold", family = base_family),
    axis.title.y      = element_blank(),
    axis.text.y       = element_blank(),
    axis.ticks.y      = element_blank(),
    axis.text.x       = element_text(size = 22, color = "black", face = "bold", family = base_family),
    panel.grid        = element_blank(),
    panel.background  = element_blank(),
    plot.background   = element_blank()
  )

# Show interactively
print(p)

# ===== 5) SAVE HIGH-RES PNG =====
if (!dir_exists(out_dir)) dir_create(out_dir)
pngfile <- fs::path(out_dir, paste0(file_stem, ".png"))

ragg::agg_png(
  filename = pngfile,
  width = 120, height = 60, units = "cm",
  res = 500, scaling = 2
)
suppressWarnings(print(p))  # silences harmless "Some strata appear at multiple axes" messages
invisible(dev.off())

message("Saved plot: ", pngfile)


# Show interactively
# print(p)

# ===== 5) SAVE VECTOR PDF =====
if (!fs::dir_exists(out_dir)) fs::dir_create(out_dir)
pdffile <- fs::path(out_dir, paste0(file_stem, ".pdf"))

cairo_pdf(
  filename = pdffile,
  width = 120/2.54,   # cm → inches
  height = 60/2.54,
  family = "sans"
)

suppressWarnings(print(p))  # silences harmless warnings
dev.off()
## png 
##   2
message("Saved plot: ", pdffile)




# ===== SAVE AS SVG (vector) =====
svgfile <- fs::path(path, paste0(Group, Out, "BoxB", ".svg"))

svglite::svglite(
  file = svgfile,
  width = 60 / 2.54,   # cm → inches
  height = 36 / 2.54
)
print(p)
dev.off()
## png 
##   2
message("✅ SVG saved successfully: ", svgfile)



# Copiloting helped with some of the comments. (But not with the streamlining.)

Other Handy Functions

# CalculateDemographics:
# This is for nonscaled (or not-logged or the like 'raw') data only!

# CalculateDemographics Function
CalculateDemographics <- function(AllData, clinicalData, Group) {
  # Determine the condition based on the group
  cond <- switch(Group,
                 'female' = AllData[,'SEX.1F.2M'] == min(AllData[,'SEX.1F.2M']),
                 'male' = AllData[,'SEX.1F.2M'] == max(AllData[,'SEX.1F.2M']),
                 'All' = rep(TRUE, nrow(AllData)))
  
  # Filter the data based on the condition
  tv_red <- AllData[cond, ]
  Clini2 <- clinicalData[rownames(tv_red), ]
  
  # Calculate demographics
  N <- nrow(tv_red)
  AGE <- median(tv_red[,'AGE'])
  AGEq1 <- quantile(tv_red[,'AGE'], 0.25)
  AGEq3 <- quantile(tv_red[,'AGE'], 0.75)
  BMI <- median(tv_red[,'BMI'])
  BMIq1 <- quantile(tv_red[,'BMI'], 0.25)
  BMIq3 <- quantile(tv_red[,'BMI'], 0.75)
  PFAS <- median(tv_red[,'PFAS'])
  PFASq1 <- quantile(tv_red[,'PFAS'], 0.25)
  PFASq3 <- quantile(tv_red[,'PFAS'], 0.75)
  HDL <- median(Clini2[,'HDL'])
  HDLq1 <- quantile(Clini2[,'HDL'], 0.25)
  HDLq3 <- quantile(Clini2[,'HDL'], 0.75)
  LDL <- median(as.numeric(Clini2[,'LDL']))
  LDLq1 <- quantile(as.numeric(Clini2[,'LDL']), 0.25)
  LDLq3 <- quantile(as.numeric(Clini2[,'LDL']), 0.75)
  SGmin <- min(tv_red[,'Steatosis.Grade.0.To.3'])
  SGmax <- max(tv_red[,'Steatosis.Grade.0.To.3'])
  FSmin <- min(tv_red[,'Fibrosis.Stage.0.to.4'])
  FSmax <- max(tv_red[,'Fibrosis.Stage.0.to.4'])
  NFmin <- min(tv_red[,'Necroinflammation'])
  NFmax <- max(tv_red[,'Necroinflammation'])
  HImin <- min(tv_red[,'HOMA-IR'])
  HImax <- max(tv_red[,'HOMA-IR'])
  HI <- median(tv_red[,'HOMA-IR'])
  HIq1 <- quantile(tv_red[,'HOMA-IR'], 0.25)
  HIq3 <- quantile(tv_red[,'HOMA-IR'], 0.75)
  
  return(c(N, AGE, AGEq1, AGEq3, BMI, BMIq1, BMIq3, PFAS, PFASq1, PFASq3, HDL, HDLq1, HDLq3, LDL, LDLq1, LDLq3,
           SGmin, SGmax, FSmin, FSmax, NFmin, NFmax, HImin, HImax, HI, HIq1, HIq3))
}

# Calculate demographics for each group
Group <- 'All'; d_all <- CalculateDemographics(CombinedData, clinicalData, Group)
Group <- 'female'; d_female <- CalculateDemographics(CombinedData, clinicalData, Group)
Group <- 'male'; d_male <- CalculateDemographics(CombinedData, clinicalData, Group)

# Combine results into a matrix
d_totaali <- t(rbind(d_all, d_female, d_male))
rownames(d_totaali) <- c('N', 'AGE', 'AGEq1', 'AGEq3', 'BMI', 'BMIq1', 'BMIq3', 'PFAS', 'PFASq1', 'PFASq3', 'HDL', 'HDLq1', 'HDLq3', 'LDL', 'LDLq1', 'LDLq3',
                         'SGmin', 'SGmax', 'FSmin', 'FSmax', 'NFmin', 'NFmax', 'HImin', 'HImax', 'HI', 'HIq1', 'HIq3')
d_totaali
##              d_all   d_female     d_male
## N      104.0000000 69.0000000 35.0000000
## AGE     49.0000000 46.0000000 52.0000000
## AGEq1   43.0000000 43.0000000 44.5000000
## AGEq3   54.2500000 53.0000000 58.0000000
## BMI     45.2960308 45.1082579 46.2603878
## BMIq1   41.0973732 41.1066653 41.3139309
## BMIq3   49.0881549 48.7197232 49.6020327
## PFAS     0.4114301  0.3542222  0.4797220
## PFASq1   0.2846164  0.2517962  0.4114301
## PFASq3   0.5679388  0.5326427  0.7002281
## HDL      1.1000000  1.1300000  1.0100000
## HDLq1    0.9575000  0.9700000  0.8700000
## HDLq3    1.2825000  1.3600000  1.1650000
## LDL      2.3000000  2.5000000  1.9000000
## LDLq1    1.6000000  1.9000000  1.3500000
## LDLq3    2.9000000  3.0000000  2.4000000
## SGmin    0.0000000  0.0000000  0.0000000
## SGmax    3.0000000  3.0000000  1.0000000
## FSmin    0.0000000  0.0000000  0.0000000
## FSmax    4.0000000  3.0000000  4.0000000
## NFmin    0.0000000  0.0000000  0.0000000
## NFmax    2.0000000  1.0000000  2.0000000
## HImin    0.2426667  0.7822222  0.2426667
## HImax   14.5244444 14.5244444 10.7555556
## HI       3.1257778  3.0417778  3.9200000
## HIq1     1.7556667  1.6333333  2.2644444
## HIq3     4.5747778  3.8666667  5.4240000
# Sample Size Functions
# Function to determine sample sizes for binary outcomes
CalculateSampleSize <- function(NonAlcoholicFattyLiverDisease, OutcomeVariables, Group) { 
  NAFLDo <- switch(Group,
                   'Male' = NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[,'SEX.1F.2M'] == 2, ],
                   'Female' = NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[,'SEX.1F.2M'] == 1, ],
                   'All' = NonAlcoholicFattyLiverDisease)
  
  n0 <- nrow(NAFLDo[NAFLDo[, OutcomeVariables] == 0, ])
  n1 <- nrow(NAFLDo[NAFLDo[, OutcomeVariables] > 0, ])
  
  return(c(n0, n1))
}

# Function to determine sample sizes for continuous outcomes
CalculateSampleSizeContinuous <- function(NonAlcoholicFattyLiverDisease, OutcomeVariables, Group, sick_groupe) { 
  sample_data <- c()
  for (i in 1:2) {
    NAFLDo <- switch(Group,
                     'Male' = NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[,'SEX.1F.2M'] == 2 & (i == 1 & !sick_groupe | i == 2 & sick_groupe), ],
                     'Female' = NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[,'SEX.1F.2M'] == 1 & (i == 1 & !sick_groupe | i == 2 & sick_groupe), ],
                     'All' = NonAlcoholicFattyLiverDisease[(i == 1 & !sick_groupe | i == 2 & sick_groupe), ])
    sample_data <- c(sample_data, nrow(NAFLDo))
  }
  return(sample_data)
}

# Prepare NonAlcoholicFattyLiverDisease data
NonAlcoholicFattyLiverDisease <- CombinedData[, 1:28]
NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[, 5] > 0, 5] <- 1
NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[, 6] > 0, 6] <- 1
NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[, 7] > 0, 7] <- 1
NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[, 8] <= 1.5, 8] <- 0
NonAlcoholicFattyLiverDisease[NonAlcoholicFattyLiverDisease[, 8] > 1.5, 8] <- 1
colnames(NonAlcoholicFattyLiverDisease) <- gsub("-", ".", colnames(NonAlcoholicFattyLiverDisease))
colnames(NonAlcoholicFattyLiverDisease) <- gsub("/", ".", colnames(NonAlcoholicFattyLiverDisease))
colnames(NonAlcoholicFattyLiverDisease) <- gsub("11", "X11", colnames(NonAlcoholicFattyLiverDisease))
colnames(NonAlcoholicFattyLiverDisease) <- gsub("17", "X17", colnames(NonAlcoholicFattyLiverDisease))
colnames(NonAlcoholicFattyLiverDisease) <- gsub("#", ".", colnames(NonAlcoholicFattyLiverDisease))
colnames(NonAlcoholicFattyLiverDisease)[colnames(NonAlcoholicFattyLiverDisease) == 'X17aOH.P4'] <- 'X17.aOHP4'

# Calculate sample sizes for each outcome and group
jappend <- c()
outcomes <- c('Steatosis.Grade.0.To.3', 'Fibrosis.Stage.0.to.4', 'Necroinflammation', 'HOMA.IR')
steroidGroups <- c('All', 'Female', 'Male')

for (OutcomeVariables in outcomes) {
  for (Group in steroidGroups) {
    jappend <- c(jappend, CalculateSampleSize(NonAlcoholicFattyLiverDisease, OutcomeVariables, Group))
  }
}

matrix(unlist(jappend), ncol = 8)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,]   83   11   62   26   88    9   18   56
## [2,]   21   25   42   19   16   28   86    5
## [3,]   58   10   43   16   60    7   13   30
# Mean and Quantile Calculation Function
CalculateMeanQuantiles <- function(Group) {
  cond <- switch(Group,
                 'Female' = AllData[,'Gender'] == min(AllData[,'Gender']),
                 'Male' = AllData[,'Gender'] == max(AllData[,'Gender']),
                 'All' = rep(TRUE, nrow(AllData)))
  
  tv_red <- CombinedData[cond, ]
  # M <- tv_red[, MediatorVariables] #Change this if needed, like
  AllData_aux=AllData
  colnames(AllData_aux) <- gsub("_L$", "", colnames(AllData_aux))
  Var=colnames(AllData_aux)[9:71]; 
  M <- tv_red[, Var] #check
  
  cM <- round(apply(M, 2, median, na.rm = TRUE), 3)
  quants <- c(0.25, 0.75)
  csd <- round(apply(M, 2, quantile, probs = quants, na.rm = TRUE), 3)
  
  tot <- rbind(cM, csd)
  return(tot)
}

totQ <- CalculateMeanQuantiles('All')
femQ <- CalculateMeanQuantiles('Female')
menQ <- CalculateMeanQuantiles('Male')
tot3 <- cbind(t(totQ), t(femQ), t(menQ))
print(tot3)
##                       cM       25%       75%        cM       25%       75%
## E              40685.322 33628.166 45755.034 40537.895 30857.292 45249.556
## 11-KT            926.089   617.465  1161.315   925.208   606.216  1163.356
## 17a-OHP5        1613.375   924.435  3289.708  1614.580   776.007  3293.109
## S                842.755   560.461  1134.338   809.879   533.419  1049.962
## B               7258.158  5046.464 12537.708  7545.362  5054.271 12526.122
## 11b-OHA4        8870.234  6699.567 11868.466  9460.814  6378.815 11405.670
## 11-KDHT          100.000   100.000   100.000   100.000   100.000   100.000
## 11-KA4          1262.398   845.364  1687.177  1261.992   838.122  1662.598
## DHEA            8294.146  5602.303 12330.232  9395.403  6072.018 13100.160
## T/Epi-T         1224.521   837.845  8088.642   912.986   717.224  1215.801
## 17a-OHP4        1121.074   631.294  1716.772   829.834   527.319  1510.033
## AN               250.000   250.000   506.554   250.000   250.000   504.840
## DHT              275.563   131.840   461.194   202.631   109.361   310.982
## A4              2226.090  1547.706  3064.007  2366.296  1578.072  3276.114
## DOC               62.447    47.405    90.469    59.787    47.996    96.342
## P5              2091.264  1288.141  4116.228  2374.983  1369.833  4172.336
## P4               124.244    78.932   262.136   159.613    91.955   585.953
## A               1105.174   685.244  1827.082  1290.651   924.760  2003.573
## E2               137.493    77.413   271.127   166.653    65.889   441.212
## E1               270.921   169.992   407.188   321.357   192.934   481.816
## CA                11.009     6.956    16.045    11.281     6.551    16.034
## CDCA              30.805    27.355    37.427    30.829    26.801    37.032
## DCA               21.409    16.977    26.085    21.603    18.252    26.003
## LCA               94.263    84.376    99.879    95.019    85.374    99.875
## UDCA               7.380     5.328    11.014     8.691     6.197    12.562
## GCA            15966.631  9474.727 23382.639 17186.637  9271.789 23101.504
## GCDCA          14801.279  8960.878 23899.024 15554.342  8549.356 24876.865
## GDCA            7437.865  3297.726 14388.534  7834.940  4313.704 15531.252
## GDHCA              2.379     2.021     2.856     2.335     1.965     3.022
## GHCA              22.480    12.237    37.627    25.172    14.238    40.098
## GHDGA              3.162     0.350     6.579     3.162     0.350     5.979
## GLCA             340.781   182.181   607.337   354.113   214.072   590.258
## GUDCA           1184.075   565.444  2333.931  1237.867   588.114  2502.512
## TbMCA             29.275    11.419    81.203    32.881    12.593    79.730
## ToMCA             17.255    13.683    24.822    17.558    13.870    25.331
## TCA             4700.406  3054.383  6914.417  4386.529  2830.524  6474.410
## TCDCA           4950.377  3173.812  7236.787  4771.524  2965.906  7203.080
## TDCA            1558.676   857.081  3995.534  1696.225   875.142  3897.210
## TDHCA             11.035     9.875    13.931    10.913     9.900    13.622
## THCA              17.952    14.775    23.550    18.033    14.814    24.309
## TLCA             126.752    67.017   244.035   134.927    69.953   249.699
## TUDCA            116.054    73.134   214.039   112.032    70.874   214.639
## C4                35.059    19.490    65.982    35.059    20.991    70.330
## PFHpA              0.053     0.035     0.079     0.045     0.027     0.068
## PFHxA              0.208     0.176     0.253     0.198     0.177     0.252
## PFHxA_Branched     2.469     1.554     5.484     2.454     1.501     4.586
## PFHxS              0.040     0.029     0.052     0.034     0.027     0.046
## PFNA               0.025     0.016     0.033     0.024     0.016     0.032
## PFOA               0.126     0.086     0.162     0.127     0.084     0.160
## PFOS               0.142     0.101     0.239     0.122     0.090     0.209
## Cer               32.623    28.245    38.408    32.973    27.997    38.314
## CE                 3.991     3.597     4.543     4.108     3.651     4.714
## DG                 7.136     5.233     9.085     7.169     5.158     9.111
## Hexcer             7.075     6.192     8.030     6.831     6.023     7.834
## LPC                6.120     5.163     7.369     5.991     5.088     7.136
## PC                82.206    72.488    88.679    82.918    71.779    88.762
## PC_O               6.103     5.327     7.117     5.837     5.067     6.313
## PE                47.749    43.739    51.178    46.222    43.145    49.655
## PI                 7.294     6.387     8.221     7.254     6.332     8.232
## SM                19.720    18.148    23.216    19.435    17.727    21.713
## TG_SFA            53.188    36.273    81.862    56.972    36.846    78.660
## MUFA              36.247    23.325    51.350    34.481    20.869    49.915
## TG_PUFA           30.739    22.908    46.229    28.105    21.311    39.782
##                       cM       25%       75%
## E              42423.998 37905.650 49608.261
## 11-KT            941.885   759.039  1087.194
## 17a-OHP5        1612.170  1189.626  3049.818
## S                892.445   575.816  1346.303
## B               6494.666  5084.823 12851.865
## 11b-OHA4        8662.543  7509.933 12527.759
## 11-KDHT          100.000   100.000   153.014
## 11-KA4          1318.019   890.176  1699.468
## DHEA            6683.324  5301.176  9524.500
## T/Epi-T        10808.468  8204.054 13367.028
## 17a-OHP4        1510.204  1076.157  1838.766
## AN               250.000   250.000   410.374
## DHT              524.121   381.304   632.030
## A4              1994.668  1538.977  2424.236
## DOC               67.218    44.330    87.425
## P5              1583.961  1120.402  2464.258
## P4                94.012    75.130   145.544
## A                721.652   443.239  1141.528
## E2               117.290   104.037   174.532
## E1               231.762   166.015   284.701
## CA                10.644     7.472    15.679
## CDCA              29.864    27.565    43.038
## DCA               19.452    16.438    26.025
## LCA               89.734    84.370   101.707
## UDCA               6.759     3.996     8.016
## GCA            14578.707 10409.347 22389.679
## GCDCA          12738.863  9312.564 21418.038
## GDCA            5828.298  1409.889 12554.955
## GDHCA              2.428     2.060     2.596
## GHCA              19.170    10.032    30.172
## GHDGA              3.353     0.118     8.434
## GLCA             310.364   157.431   656.270
## GUDCA            962.236   545.741  1773.351
## TbMCA             26.160    10.759    80.139
## ToMCA             17.025    12.073    24.164
## TCA             6000.485  3679.875  7981.012
## TCDCA           5351.020  3711.909  7026.790
## TDCA            1422.538   629.204  4360.791
## TDHCA             11.664     9.851    14.373
## THCA              17.069    14.799    20.508
## TLCA             123.998    62.657   239.906
## TUDCA            117.822    78.817   213.520
## C4                33.495    16.772    48.338
## PFHpA              0.079     0.055     0.093
## PFHxA              0.229     0.176     0.259
## PFHxA_Branched     2.769     1.666     7.058
## PFHxS              0.051     0.039     0.066
## PFNA               0.027     0.020     0.039
## PFOA               0.125     0.097     0.167
## PFOS               0.182     0.145     0.270
## Cer               31.922    28.555    38.713
## CE                 3.811     3.486     4.391
## DG                 7.103     5.699     8.900
## Hexcer             7.559     6.413     8.600
## LPC                6.667     5.328     7.768
## PC                82.087    74.528    87.354
## PC_O               6.992     6.135     7.670
## PE                49.371    47.345    53.112
## PI                 7.350     6.487     8.041
## SM                21.502    18.855    24.641
## TG_SFA            47.274    36.095    87.772
## MUFA              42.592    29.969    56.530
## TG_PUFA           33.201    24.887    50.849
# write.csv(tot3,'tables5_steroid pfas etc medians_tikka10925.csv')

#

# colnames(CombinedData)[colnames(CombinedData) %in% colnames(AllData)[9:71]]

#Above goes, but not below...

# I wanted to check the steroids with highest ACMEs between healthy (all, all) and sick (all, all).
# I needed to have the cutoffs and ways to see the overlaps. For that, I developed a function called 'FindCommonSteroids'
#Some data is needed... :)
path="C:/Users/patati/Desktop/Turku/R/hypo_basic/Tiedostot/"; setwd(path)
files <- list.files(pattern="*.RData")
ldf <- lapply(files, load)
list_of_files <- list() #create empty list
# Loop through the files
for (i in files) {list_of_files[[i]] <- get(load(paste0("", i)))}  #add files to list position
# https://www.reddit.com/r/Rlanguage/comments/nq773b/reading_multiple_rdata_files_into_a_list/
names(list_of_files) <- files #https://stackoverflow.com/questions/38643000/naming-list-elements-in-r
library(stringr)

FindCommonSteroids=function(list_of_files,Group,cond) { #cond='' vastaa kaikkia
#General categories of females or males (without 'All, 'MetabolicAssociatedLiverDisease, and 'Menopause')
u <- names(list_of_files)
a <- Group #note the writing, yes with " " in between. " female"  or " male" or " all"
ie=grep(a,u,fixed=TRUE); u2=u[ie]
del=c(grep("All",u2,fixed=TRUE),grep("MetabolicAssociatedLiverDisease",u2,fixed=TRUE),grep("Menopause",u2,fixed=TRUE))
yl=1:length(u2); lop=yl[!yl %in% del]
u=u2[lop] #general male
ie=grep(cond,u,fixed=TRUE); u=u[ie]
a <- "sick"; ie=grep(a,u,fixed=TRUE); u3=u[ie]# sick ones, male or females
u_sick=list_of_files[u3]#https://www.tutorialspoint.com/how-to-extract-strings-that-contains-a-particular-substring-in-an-r-vector
a='healthy';ie=grep(a,u,fixed=TRUE);u4=u[ie]
u_healthy=list_of_files[u4] # u_healthy=list_of_files[grep(a,u,fixed=TRUE)] 
tcross=function(u_sick) { 
  all_names = c(); i=1
  for (i in (1:length(u_sick))) { #length(u_sick) is 18
    us=u_sick[[i]]
    aux=c();
    if (dim(us)[1]>200) {aux=200} else {aux=dim(us)[1]}
    # plot(1:aux,as.numeric(us[1:aux,1]))
    values=(1:(length(as.numeric(us[,1]))-1))
    coo=c(); z=0
    for (z in values) {coo=append(coo,abs(us[z,1]-us[(z+1),1]))}  
    pss=which(coo>quantile(coo,0.95));ro=round(length(coo)/3)# pss[pss<ro]#round(length(which(coo>quantile(coo,0.95)))/2)]
    dpp=diff(pss[pss<ro]) #https://stackoverflow.com/questions/13602170/how-do-i-find-the-difference-between-two-values-without-knowing-which-is-larger
    dpp_sort=sort(dpp,decreasing = TRUE)
    if (length(dpp_sort)<5) { for_comp=length(dpp)+1} else {
      if (sum(dpp_sort[1:5]>5)==5) {cf=dpp_sort[5];cff=which(dpp>cf)[1]} else {cf=dpp_sort[2];cff=which(dpp>cf)[1]}
      cff=which(dpp>(cf-1))[1]
      for_comp=pss[cff]; }
    
    if (aux<30) {for_comp  =max(which(as.vector(us[,1]>0)))}
    if (aux>30) {if (max(pss[pss<ro])<30) (for_comp=30)} #due the small amount of good ones that can be like 4...
    
    rt2=us[1:for_comp,]; j=0
    hoi=c();for (j in 1:dim(rt2)[1]) {hoi=append(hoi,scan(text=rownames(rt2)[j], what=""))}
    hoi=as.data.frame(matrix(hoi, ncol = 4,  byrow = TRUE), stringsAsFactors = FALSE)
    hoi[,2] <- gsub("\\.", "-",  hoi[,2] ) #:)
    xz=round(quantile(table(hoi[,2]),0.25)); if (xz<2) {xz=0} else {xz=xz} #let's start with 25% and if not ok go to like 5%
    names=c();names=names(table(hoi[,2])[table(hoi[,2])>xz]) 
    # print(all_names)
    all_names=append(names,all_names)
  }
  return(sort(table(all_names),decreasing = TRUE))  
}
tc_sick=tcross(u_sick);tc_healthy=tcross(u_healthy) # table(the_cross)# hist(table(the_cross),breaks=10)
cae1=as.numeric(names(sort(table(tc_sick),decreasing = TRUE)))[1];cae2=as.numeric(names(sort(table(tc_sick),decreasing = TRUE)))[2]
if (max(tc_sick)==cae1 | max(tc_sick)==cae2)
cfn=cae2-1 #sometimes as with females no differences, i.. tc_sick;rev(as.numeric(names(table(tc_sick))))[2]-1#
if (is.na(cfn)) {steroid_sick=names(tc_sick)} else {steroid_sick=names(tc_sick[tc_sick>(cfn)])}
cae1=as.numeric(names(sort(table(tc_healthy),decreasing = TRUE)))[1];cae2=as.numeric(names(sort(table(tc_healthy),decreasing = TRUE)))[2]
if (max(tc_healthy)==cae1 | max(tc_healthy)==cae2)
cfn=cae2-1 
if (is.na(cfn)) {steroid_healthy=names(tc_healthy)} else {steroid_healthy=names(tc_healthy[tc_healthy>(cfn)])}
# https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
tbe=intersect(steroid_healthy, steroid_sick)
totaali_sh_all=steroid_sick[!steroid_sick %in% tbe] #https://www.geeksforgeeks.org/difference-between-two-vectors-in-r/ 
return(list(steroid_sick,tbe,totaali_sh_all)) } #"17a-OHP4" "DHT"      "DOC"      'P4'

Group = ' all'; cond='';all_all=FindCommonSteroids(list_of_files,Group,cond) 
#the first 'alls' ('All...') were deleted, yes, and only specific alls were used..
Group = ' female'; cond='';female_all=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' male'; cond='';male_all=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' all'; cond='Steatosis';all_steatosis=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' female'; cond='Steatosis';female_steatosis=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' male'; cond='Steatosis';male_steatosis=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' all'; cond='Fibrosis';all_Fibrosis=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' female'; cond='Fibrosis';female_Fibrosis=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' male'; cond='Fibrosis';male_Fibrosis=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' all'; cond='Necroinflammation';all_Necroinflammation=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' female'; cond='Necroinflammation';female_Necroinflammation=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' male'; cond='Necroinflammation'; male_Necroinflammation=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' all'; cond='HOMAIR'; all_HOMAIR=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' female'; cond='HOMAIR'; female_HOMAIR=FindCommonSteroids(list_of_files,Group,cond) 
Group = ' male'; cond='HOMAIR'; male_HOMAIR=FindCommonSteroids(list_of_files,Group,cond) 

pottees=c(all_all[3],female_all[3],male_all[3],
            all_steatosis[3],female_steatosis[3],male_steatosis[3],
            all_Fibrosis[3],female_Fibrosis[3],male_Fibrosis[3],
            all_Necroinflammation[3],female_Necroinflammation[3],male_Necroinflammation[3],
            all_HOMAIR[3],female_HOMAIR[3],male_HOMAIR[3])

# In case you need to print the above list. This is good for printing list of list in anyways:
# mylist <- pottees; file <- paste0("myfile_ok",date,".txt"); conn <- file(description=file, open="w")
# newlist <- lapply(seq_len(length(mylist)), function(i){
#   lapply(seq_len(length(mylist[[i]])), function(j) {
#     temp <- c(i, j, mylist[[i]][[j]])
#     writeLines(text=paste(temp, collapse=","), con=conn, sep="\r\n")  
#   }) }); close(conn)

# So this will give the most common steroids in all cases compared to all cases in all the subjects (all, female, male)
table(unlist(pottees))[rev(order(table(unlist(pottees))))]
## 
##      DOC       AN     MUFA       E1 17a-OHP4  11-KDHT   TG_SFA  TCDCA_L 
##       13       13       10        9        8        7        6        6 
##   GHCA_L      Cer       A4   TLCA_L       PI   Hexcer  GCDCA_L       DG 
##        6        6        6        5        5        5        5        5 
## 11b-OHA4       PE   THCA_L      LPC  TDHCA_L   TDCA_L  TbMCA_L       PC 
##        5        4        3        3        2        2        2        2 
##    GCA_L    DCA_L   CDCA_L    11-KT   UDCA_L  GUDCA_L   GLCA_L  GHDGA_L 
##        2        2        2        2        1        1        1        1 
##       CE     C4_L 
##        1        1
# For comparing the PFAS/steroid/BA (or lipid) in healthy and sick mediation:
# Load all the variables in the folder:
setwd("C:/Users/patati/Desktop/Turku/R/hypo4/Tiedostot/") #check this if needed...
files <- list.files(pattern="*.RData")
ldf <- lapply(files, load)
list_of_files <- list() #create empty list
# Loop through the files:
for (i in files) {list_of_files[[i]] <- get(load(paste0("", i)))}  #add files to list position
names(list_of_files) <- files #ht
# https://www.reddit.com/r/Rlanguage/comments/nq773b/reading_multiple_rdata_files_into_a_list/
# https://stackoverflow.com/questions/38643000/naming-list-elements-in-r

CompareMediation=function(list_of_files) { # I made this a function, since this kind of cross comparison could be handy also in other contexts
  health=c(); sickness=c() # Initialize empty vectors for health and sickness
  
  # Loop through the list of file names and categorize uniqueBMIValues into health and sickness
  for (i in 1:length(names(list_of_files))) {
    if (str_detect(names(list_of_files)[i],'healthy')) {
      health=append(health, list_of_files[i]) # Append to health if 'healthy' is in the name
    } else if (str_detect(names(list_of_files)[i],'sick')) {
      sickness=append(sickness, list_of_files[i]) # Append to sickness if 'sick' is in the name
    }
  }
  
  health2=c(); sickness2=c() # Initialize empty data frames for health2 and sickness2
  
  # Process health files: order by the first column in descending order and take the top 10 rows
  i=0; for (i in 1:length(c(health))) {
    health[[i]][rev(order(health[[i]][,1])),]
    health2=rbind(health2, health[[i]][1:10,])
  }
  
  # Process sickness files: order by the first column in descending order and take the top 10 rows
  i=0; for (i in 1:length(c(sickness))) {
    sickness[[i]][rev(order(sickness[[i]][,1])),]
    sickness2=rbind(sickness2, sickness[[i]][1:10,])
  }
  
  # Combine row names with the data for health2 and sickness2
  health2=cbind(rownames(health2), health2)
  sickness2=cbind(rownames(sickness2), sickness2)
  
  # Set column names for health2 and sickness2
  colnames(health2)=c('Mediation','ACME', 'd0.p', 'd0.ci_l','d0.ci_u','ADE', 'z0.p', 'z0.ci_l','z0.ci_u','Proportion Mediated', 'n1.p','n.ci_l','n1.ci_u','Total Effect','tau.p','tau.ci_l','tau.ci_u')
  colnames(sickness2)=c('Mediation','ACME', 'd0.p', 'd0.ci_l','d0.ci_u','ADE', 'z0.p', 'z0.ci_l','z0.ci_u','Proportion Mediated', 'n1.p','n.ci_l','n1.ci_u','Total Effect','tau.p','tau.ci_l','tau.ci_u')
  
  # Replace ".RData" in row names with an empty string
  rownames(health2)=str_replace_all(rep(names(health), each = 10), ".RData", "")
  rownames(sickness2)=str_replace_all(rep(names(sickness), each = 10), ".RData", "")
  # https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/rep
  # https://stackoverflow.com/questions/10294284/remove-all-special-characters-from-a-string-in-r
  # https://stackoverflow.com/questions/38643000/naming-list-elements-in-r
  
  # Process health2 data for contaminants, steroids, and bile acids or lipids
  hoi=c(); hoi=scan(text=health2[,1], what=""); hoi=as.data.frame(matrix(hoi, ncol = 4,  byrow = TRUE), stringsAsFactors = FALSE)
  colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Desig.')
  hoi_healthy=hoi[,c('Contaminants','Steroids','Bile Acids or Lipids')]
  
  # Process sickness2 data for contaminants, steroids, and bile acids or lipids
  hoi=c(); hoi=scan(text=sickness2[,1], what=""); hoi=as.data.frame(matrix(hoi, ncol = 4,  byrow = TRUE), stringsAsFactors = FALSE)
  colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids','Desig.')
  hoi_sick=hoi[,c('Contaminants','Steroids','Bile Acids or Lipids')]
  ## https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
  
  return(list(hoi_sick, hoi_healthy)) # Return the processed data as a list
}

cmt=CompareMediation(list_of_files);
hoi_sick=cmt[[1]];hoi_healthy=cmt[[2]]

# So the differences between contaminants (in the sick vs. healthy 'mediation') are:
table(hoi_sick[,1])[rev(order(table(hoi_sick[,1])))];table(hoi_healthy[,1])[rev(order(table(hoi_healthy[,1])))]
## 
##           PFOA          PFHpA          PFHxA          PFHxS PFHxA_Branched 
##             37             35             33             29             20 
##           PFNA           PFOS 
##             16             10
## 
##          PFHpA           PFOS          PFHxA           PFNA PFHxA_Branched 
##             81             37             27             15             12 
##          PFHxS 
##              8
# Differences Between Steroids...
# table(hoi_sick[,2])[rev(order(table(hoi_sick[,2])))];table(hoi_healthy[,2])[rev(order(table(hoi_healthy[,2])))]
# Differences Between BAs/Lipids...
# table(hoi_sick[,3])[rev(order(table(hoi_sick[,3])))];table(hoi_healthy[,3])[rev(order(table(hoi_healthy[,3])))]


# Copiloting helped both with the comments and streamlines here in the above code segment.

Disclaimer

# This is a part of on-going research work that has not been published yet and I cannot take a responsibility if something above is not working/ok in your system/research/etc. in 100% accuracy. 
# Furthermore, I am inbound to update these sites relatively often so a grain of salt is needed when reading these... :)
# Please note also that most of the plotting functions have bee commented out so that their execution would not take time when compiling as well as due quality reasons: Rmarkdown print is not as good in the screen and in final (html) document as the similar with knitr's 'include_graphics' that shows the full scale image.
# In addition, no sensitive data has been shared and you see only 'examples' here. Moreover, I am not an expert on biology (or even at computing per se) and may not know your specific biochemical and/or mechanical questions that you maybe using this for. 
# Not to mention few, I just want to say that I am just doing the analysis for understanding the data partly on its own right with the context as mentioned above and partly as a starting 'side project'.
# Thanks and sorry for possible inconveniences in advance! :)

About R Setups

# As per this date, see above or below, 
thedate 
## [1] "140426"
# all the above codes work to produce results with the data files given.
# All the packages have been copied to local drive (12.9.24), i.e. the content of '.libPaths()'
# R is 'R version 4.3.1 (2023-06-16 ucrt)' (with 'version'/'getRversion()' command)
# And RStudio is '2024.4.0.735' (with 'RStudio.Version()' command)
# In addition, the following information regarding the setups is given:
sessionInfo()
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=Finnish_Finland.utf8  LC_CTYPE=Finnish_Finland.utf8   
## [3] LC_MONETARY=Finnish_Finland.utf8 LC_NUMERIC=C                    
## [5] LC_TIME=Finnish_Finland.utf8    
## 
## time zone: Europe/Helsinki
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices datasets  utils    
## [8] methods   base     
## 
## other attached packages:
##   [1] ggalluvial_0.12.5           svglite_2.2.1              
##   [3] ggthemes_5.1.0              xlsx_0.6.5                 
##   [5] WGCNA_1.73                  fastcluster_1.3.0          
##   [7] dynamicTreeCut_1.63-1       viridis_0.6.5              
##   [9] viridisLite_0.4.2           tufte_0.13                 
##  [11] tint_0.1.5                  superb_0.95.19             
##  [13] sjPlot_2.8.17               scatterplot3d_0.3-44       
##  [15] scater_1.30.1               scuttle_1.12.0             
##  [17] SingleCellExperiment_1.24.0 SummarizedExperiment_1.34.0
##  [19] Biobase_2.64.0              GenomicRanges_1.56.1       
##  [21] GenomeInfoDb_1.40.1         IRanges_2.38.1             
##  [23] S4Vectors_0.42.1            BiocGenerics_0.50.0        
##  [25] MatrixGenerics_1.16.0       matrixStats_1.4.1          
##  [27] rstatix_0.7.2               rmdformats_1.0.4           
##  [29] rmarkdown_2.29              rgl_1.3.18                 
##  [31] reshape2_1.4.4              remotes_2.5.0              
##  [33] readxl_1.4.5                rcompanion_2.5.0           
##  [35] RColorBrewer_1.1-3          ragg_1.4.0                 
##  [37] qpgraph_2.36.0              quantreg_6.1               
##  [39] SparseM_1.84-2              psych_2.5.3                
##  [41] prettydoc_0.4.1             ppcor_1.1                  
##  [43] plotrix_3.8-4               plyr_1.8.9                 
##  [45] pathviewr_1.1.7             PerformanceAnalytics_2.0.8 
##  [47] xts_0.14.1                  pheatmap_1.0.12            
##  [49] MOFA2_1.14.0                mlma_6.3-1                 
##  [51] gplots_3.2.0                coxme_2.2-22               
##  [53] bdsmatrix_1.3-7             survival_3.7-0             
##  [55] abind_1.4-8                 mgcv_1.9-3                 
##  [57] nlme_3.1-166                meta_8.1-0                 
##  [59] metadat_1.4-0               mediation_4.5.0            
##  [61] sandwich_3.1-1              mvtnorm_1.3-1              
##  [63] MASS_7.3-61                 mdatools_0.14.2            
##  [65] Maaslin2_1.16.0             magrittr_2.0.3             
##  [67] magick_2.8.6                lsr_0.5.2                  
##  [69] lme4_1.1-35.5               lmtest_0.9-40              
##  [71] zoo_1.8-12                  lavaan_0.6-19              
##  [73] insight_1.3.0               igraph_2.1.4               
##  [75] hrbrthemes_0.8.7            Hmisc_5.1-3                
##  [77] glmnet_4.1-8                Matrix_1.7-0               
##  [79] ggtext_0.1.2                ggh4x_0.2.8                
##  [81] ggsankeyfier_0.1.8          ggsankey_0.0.99999         
##  [83] ggpubr_0.6.0                ggplotify_0.1.2            
##  [85] ggforestplot_0.1.0          ggforce_0.4.1              
##  [87] ggcorrplot_0.1.4.1          FSA_0.9.5                  
##  [89] fs_1.6.5                    extrafont_0.19             
##  [91] effsize_0.8.1               dmetar_0.1.0               
##  [93] datarium_0.1.0              daiR_1.0.1                 
##  [95] corrplot_0.94               correlation_0.8.7          
##  [97] ComplexHeatmap_2.18.0       circlize_0.4.15            
##  [99] censReg_0.5-38              maxLik_1.5-2.1             
## [101] miscTools_0.6-28            car_3.1-2                  
## [103] carData_3.0-5               brickster_0.2.5            
## [105] binilib_0.2.0               lubridate_1.9.4            
## [107] forcats_1.0.0               stringr_1.5.1              
## [109] dplyr_1.1.4                 purrr_1.2.1                
## [111] readr_2.1.5                 tidyr_1.3.1                
## [113] tibble_3.2.1                ggplot2_4.0.2              
## [115] tidyverse_2.0.0             bigsnpr_1.12.2             
## [117] bigstatsr_1.6.2             scales_1.4.0               
## 
## loaded via a namespace (and not attached):
##   [1] DBI_1.2.3                 bslib_0.9.0              
##   [3] lpSolve_5.6.23            httr_1.4.7               
##   [5] BiocParallel_1.38.0       prettyunits_1.2.0        
##   [7] yulab.utils_0.2.0         GenomicAlignments_1.40.0 
##   [9] sparseMatrixStats_1.14.0  pillar_1.10.2            
##  [11] Rgraphviz_2.46.0          R6_2.6.1                 
##  [13] boot_1.3-31               mime_0.12                
##  [15] lmom_3.2                  reticulate_1.39.0        
##  [17] uwot_0.2.3                gridtext_0.1.5           
##  [19] Rttf2pt1_1.3.12           Rhdf5lib_1.24.1          
##  [21] libcoin_1.0-10            dir.expiry_1.12.0        
##  [23] optparse_1.7.5            fpc_2.2-13               
##  [25] S7_0.2.1                  GlobalOptions_0.1.2      
##  [27] rbibutils_2.2.16          caTools_1.18.3           
##  [29] polyclip_1.10-6           beachmat_2.18.0          
##  [31] htmltools_0.5.8.1         e1071_1.7-14             
##  [33] datawizard_1.1.0          ggrepel_0.9.6            
##  [35] CompQuadForm_1.4.3        fgsea_1.28.0             
##  [37] poibin_1.5                rpart_4.1.23             
##  [39] clue_0.3-64               tidyselect_1.2.1         
##  [41] RSQLite_2.4.0             cowplot_1.1.3            
##  [43] GenomeInfoDbData_1.2.12   ScaledMatrix_1.12.0      
##  [45] gridExtra_2.3             graph_1.82.0             
##  [47] mathjaxr_1.8-0            vipor_0.4.7              
##  [49] rtracklayer_1.64.0        doRNG_1.8.6.2            
##  [51] Rtsne_0.17                DelayedMatrixStats_1.26.0
##  [53] lazyeval_0.2.2            sass_0.4.9               
##  [55] bitops_1.0-9              labeling_0.4.3           
##  [57] KEGGREST_1.44.1           promises_1.3.3           
##  [59] shape_1.4.6.1             rhdf5filters_1.14.1      
##  [61] GenomicFeatures_1.56.0    locfit_1.5-9.10          
##  [63] DelayedArray_0.30.1       arrow_17.0.0.1           
##  [65] multcomp_1.4-28           assertthat_0.2.1         
##  [67] tools_4.4.1               basilisk_1.16.0          
##  [69] shiny_1.10.0              BiocFileCache_2.12.0     
##  [71] googleCloudStorageR_0.7.0 rlang_1.1.5              
##  [73] generics_0.1.4            BiocSingular_1.18.0      
##  [75] evaluate_1.0.3            httr2_1.1.2              
##  [77] bayestestR_0.16.0         BiocIO_1.14.0            
##  [79] sjlabelled_1.2.0          basilisk.utils_1.16.0    
##  [81] expm_1.0-0                colorspace_2.1-1         
##  [83] data.table_1.16.2         withr_3.0.2              
##  [85] shinyBS_0.61.1            gargle_1.5.2             
##  [87] RCurl_1.98-1.17           restfulr_0.0.15          
##  [89] xtable_1.8-8              MatrixModels_0.5-4       
##  [91] systemfonts_1.2.3         mclust_6.1.1             
##  [93] httpuv_1.6.15             robustbase_0.99-4        
##  [95] MuMIn_1.48.11             rJava_1.0-11             
##  [97] broom_1.0.12              GO.db_3.18.0             
##  [99] rhdf5_2.46.1              netmeta_3.2-0            
## [101] googleAuthR_2.0.2         vctrs_0.6.5              
## [103] lifecycle_1.0.4           proxy_0.4-27             
## [105] codetools_0.2-20          mnormt_2.1.1             
## [107] haven_2.5.5               DESeq2_1.44.0            
## [109] progress_1.2.3            dbplyr_2.5.0             
## [111] cellranger_1.1.0          jquerylib_0.1.4          
## [113] Rcpp_1.0.13               pcaPP_2.0-5              
## [115] rstudioapi_0.17.1         stringi_1.8.4            
## [117] hms_1.1.3                 minqa_1.2.8              
## [119] cachem_1.1.0              ggeffects_2.2.1          
## [121] plm_2.6-6                 XVector_0.44.0           
## [123] jpeg_0.1-11               pbivnorm_0.6.0           
## [125] GetoptLong_1.0.5          bookdown_0.42            
## [127] HDF5Array_1.30.0          SparseArray_1.4.8        
## [129] htmlwidgets_1.6.4         kernlab_0.9-33           
## [131] Formula_1.2-5             prabclus_2.3-4           
## [133] DescTools_0.99.60         class_7.3-22             
## [135] memoise_2.0.1             crayon_1.5.3             
## [137] gridGraphics_0.5-1        rappdirs_0.3.3           
## [139] S4Arrays_1.4.1            rrapply_1.2.7            
## [141] magic_1.6-1               xml2_1.3.6               
## [143] filelock_1.0.3            preprocessCore_1.66.0    
## [145] Exact_3.3                 flexmix_2.3-20           
## [147] UCSC.utils_1.0.0          png_0.1-8                
## [149] tzdb_0.4.0                bigsparser_0.6.1         
## [151] fastmap_1.2.0             sjstats_0.19.0           
## [153] nortest_1.0-4             pkgconfig_2.0.3          
## [155] cli_3.6.4                 beeswarm_0.4.0           
## [157] gld_2.6.7                 qtl_1.70                 
## [159] ggsignif_0.6.4            nnet_7.3-19              
## [161] performance_0.14.0        textshaping_1.0.1        
## [163] sjmisc_2.8.10             bigassertr_0.1.7         
## [165] rmio_0.4.0                multcompView_0.1-10      
## [167] gdtools_0.4.2             DEoptimR_1.1-3-1         
## [169] timechange_0.3.0          foreign_0.8-87           
## [171] splines_4.4.1             askpass_1.2.1            
## [173] blob_1.2.4                impute_1.76.0            
## [175] annotate_1.82.0           XML_3.99-0.18            
## [177] collapse_2.0.16           numDeriv_2016.8-1.1      
## [179] ggbeeswarm_0.7.2          knitr_1.49               
## [181] dichromat_2.0-0.1         compiler_4.4.1           
## [183] rjson_0.2.22              biomaRt_2.60.1           
## [185] extrafontdb_1.0           bit_4.5.0                
## [187] diptest_0.77-1            metafor_4.8-0            
## [189] BiocNeighbors_1.18.0      glue_1.7.0               
## [191] glmmML_1.1.6              digest_0.6.37            
## [193] quadprog_1.5-8            irlba_2.3.5.1            
## [195] xlsxjars_0.6.1            fontLiberation_0.1.0     
## [197] foreach_1.5.2             flock_0.7                
## [199] fontBitstreamVera_0.1.1   zlibbioc_1.50.0          
## [201] tweenr_2.0.2              lattice_0.22-6           
## [203] biglm_0.9-3               openssl_2.3.2            
## [205] rsvd_1.0.5                nloptr_2.1.1             
## [207] yaml_2.3.10               later_1.3.2              
## [209] modeltools_0.2-24         backports_1.5.0          
## [211] Rsamtools_2.20.0          AnnotationDbi_1.66.0     
## [213] parallel_4.4.1            fontquiver_0.2.1         
## [215] gtable_0.3.6              xfun_0.50                
## [217] renv_1.0.11               Biostrings_2.72.1        
## [219] getopt_1.20.4             curl_5.2.2               
## [221] rootSolve_1.8.2.4         doParallel_1.0.17        
## [223] KernSmooth_2.23-26        jsonlite_1.8.9           
## [225] coin_1.4-3                base64enc_0.1-3          
## [227] vwline_0.2-4              iterators_1.0.14         
## [229] TH.data_1.1-3             bigparallelr_0.3.2       
## [231] fastmatch_1.1-4           checkmate_2.3.2          
## [233] gtools_3.9.5              htmlTable_2.4.3          
## [235] rngtools_1.5.2            gridBezier_1.1-1         
## [237] bit64_4.5.2               cluster_2.1.6            
## [239] Rdpack_2.6.4              farver_2.1.2             
## [241] ggstance_0.3.7            zip_2.3.1

References

# Here are the reference links unceremoniously in order of appearance:

# https://stackoverflow.com/questions/43527520/r-markdown-yaml-scanner-error-mapping-values
# https://appsilon.com/imputation-in-r 
# https://www.datasciencemadesimple.com/get-minimum-value-of-a-column-in-r-2/?expand_article=1
# https://readxl.tidyverse.org/articles/column-names.html
# https://www.analyticsvidhya.com/blog/2021/06/hypothesis-testing-parametric-and-non-parametric-
# https://stackoverflow.com/questions/54264980/r-how-to-set-row-names-attribute-as-numeric-from-character 
# https://stackoverflow.com/questions/13676878/fastest-way-to-get-min-from-every-column-in-a-matrix
# https://www.geeksforgeeks.org/performing-logarithmic-computations-in-r-programming-log-log10-log1p-and-log2-functions/ 
# https://stackoverflow.com/questions/50476717/i-want-to-align-match-two-
# https://mdatools.com/docs/preprocessing--autoscaling.html
# https://svkucheryavski.gitbooks.io/mdatools/content/preprocessing/text.html
# https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
# https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub
# https://rdrr.io/bioc/qpgraph/man/qpNrr.html
# https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/
# https://r-graph-gallery.com/265-grouped-boxplot-with-ggplot2.html
# https://stackoverflow.com/questions/53724834/why-does-the-plot-size-differ-between-docx-and-html-in-rmarkdownrender
# https://stackoverflow.com/questions/32539222/group-boxplot-data-while-keeping-their-individual-x-axis-labels-in-ggplot2-in-r
# https://stackoverflow.com/questions/34522732/changing-fonts-in-ggplot2
# https://stackoverflow.com/questions/37488075/align-axis-label-on-the-right-with-ggplot2
# http://www.sthda.com/english/wiki/ggplot2-rotate-a-graph-
# http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
# https://stackoverflow.com/questions/76758153/is-there-a-way-to-change-the-asterisks-to-match-custom-p-values 
# https://datavizpyr.com/horizontal-CreateBoxplots-with-ggplot2-in-r/
# https://stackoverflow.com/questions/72564551/a-custom-legend-unrelated-to-data-in-ggplot
# https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2
# https://stackoverflow.com/questions/66429500/how-to-make-raggagg-png-device-work-with-ggsave
# https://stats.stackexchange.com/questions/237256/is-the-shapiro-wilk-test-only-applicable-to-smaller-sample-sizes  
# https://statisticsbyjim.com/hypothesis-testing/nonparametric-parametric-tests/
# https://www.statology.org/mean-standard-deviation-grouped-data/  
# https://amsi.org.au/ESA_Senior_Years/SeniorTopic4/4h/4h_2content_11.html  
# https://www.themathdoctors.org/mean-and-standard-deviation-of-grouped-data/
# https://stackoverflow.com/questions/29825537/group-categories-in-r-according-to-first-letters-of-a-string
# https://datatofish.com/create-dataframe-in-r/
# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot
# http://www.sthda.com/english/wiki/unpaired-two-samples-wilcoxon-test-in-r
# https://blogs.sas.com/content/iml/2011/04/27/log-transformations-how-to-handle-negative-data-values.html
# https://stats.stackexchange.com/questions/155429/how-to-transform-negative-values-to-logarithms
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1534033/
# https://www.statisticshowto.com/probability-and-statistics/statistics-definitions/parametric-and-non-parametric-data/
# https://www.bmj.com/content/312/7038/1079.full 
# https://stats.stackexchange.com/questions/589920/how-can-i-back-transform-a-log-data-to-interpret-t-test-and-get-original-ci
# https://www.biostars.org/p/16481/
# https://whitlockschluter3e.zoology.ubc.ca/RLabs/R_tutorial_Contingency_analysis.html  
# https://sphweb.bumc.bu.edu/otlt/mph-modules/ep/ep713_randomerror/ep713_randomerror6.html
# https://www.r-bloggers.com/2015/01/easy-error-propagation-in-r/ 
# https://www.biostars.org/p/342756/ 
# https://en.wikipedia.org/wiki/Tukey%27s_range_test 
# https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test
# Jukka Vaari 1993 Fysiikan Laboratoriotyöt :)
# https://www.r-bloggers.com/2020/03/how-to-standardize-group-colors-in-data-visualizations-in-r/
# https://stackoverflow.com/questions/71745719/how-to-control-stripe-transparency-using-ggforestplot-geom-stripes
# https://stackoverflow.com/questions/10547487/remove-facet-wrap-labels-completely
# https://stackoverflow.com/questions/24169675/multiple-colors-in-a-facet-strip-background-in-ggplot
# http://www.sthda.com/english/wiki/ggplot2-axis-scales-and-transformations 
# https://www.statology.org/geom_point-fill/
# https://stackoverflow.com/questions/62093084/set-geom-vline-line-types-and-sizes-with-aes-mapping-in-ggplot2
# https://en.wikipedia.org/wiki/Standard_error
# https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
# https://www.reddit.com/r/Rlanguage/comments/nq773b/reading_multiple_rdata_files_into_a_list/
# https://stackoverflow.com/questions/38643000/naming-list-elements-in-r
# https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/rep
# https://stackoverflow.com/questions/10294284/remove-all-special-characters-from-a-string-in-r
# https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct
# https://jokergoo.github.io/circlize_book/book/advanced-layout.htmlcombine-circular-plots
# https://stackoverflow.com/questions/31943102/rotate-labels-in-a-chorddiagram-r-circlize
# https://bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/overview.html
# https://stats.stackexchange.com/questions/79399/calculate-variance-explained-by-each-predictor-in-multiple-regression-using-r
# https://rdrr.io/github/MRCIEU/TwoSampleMR/man/get_r_from_pn.html
# https://onlinestatbook.com/2/effect_size/variance_explained.html
# https://stackoverflow.com/questions/10441437/why-am-i-getting-x-in-my-column-names-when-reading-a-data-frame
# https://stackoverflow.com/questions/27044727/removing-characters-from-string-in-r
# https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
# https://r4ds.had.co.nz/graphics-for-communication.htmlfigure-sizing
# https://bioconductor.org/packages/devel/bioc/vignettes/SingleCellExperiment/inst/doc/intro.html
# https://bioinformatics.stackexchange.com/questions/22414/when-analysing-microarray-data-is-it-need-to-do-normalization-and-standardizati
# https://bioinformatics.stackexchange.com/questions/22426/inconsistent-microarray-expression-levels-after-normalizing-with-log2
# https://www.reddit.com/r/bioinformatics/comments/1ejs94m/log2_transformation_and_quantile_normalization/
# https://www.statology.org/cohens-d-in-r/
# https://stackoverflow.com/questions/10688137/how-to-fix-spaces-in-column-names-of-a-data-frame-remove-spaces-inject-dots
# https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use   
# https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
# https://www.rdocumentation.org/packages/corrplot/versions/0.94/topics/corrplot
# https://stackoverflow.com/questions/26574054/how-to-change-font-size-of-the-correlation-coefficient-in-corrplot
# https://stackoverflow.com/questions/9543343/plot-a-jpg-image-using-base-graphics-in-r
# https://scales.arabpsychology.com/stats/how-to-remove-the-last-character-from-a-string-in-r-2-examples/
# https://ggplot2.tidyverse.org/reference/geom_smooth.html
# https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
# https://www.geeksforgeeks.org/r-check-if-a-directory-exists-and-create-if-it-does-not/
# https://r-graph-gallery.com/network.html)
# https://www.geeksforgeeks.org/elementwise-matrix-multiplication-in-r/
# https://r-graph-gallery.com/249-igraph-network-map-a-color.html
# https://rdrr.io/cran/mlma/man/data.org.html 
# https://cran.r-project.org/web/packages/mma/mma.pdf
# https://www.statology.org/glm-vs-lm-in-r/
# https://intro2r.com/loops.html 
# https://www.benjaminbell.co.uk/2022/12/loops-in-r-nested-loops.html 
# https://stackoverflow.com/questions/21847830/handling-java-lang-outofmemoryerror-when-writing-to-excel-from-r
# https://topepo.github.io/caret/parallel-processing.html 
# https://bookdown.org/content/b472c7b3-ede5-40f0-9677-75c3704c7e5c/more-than-one-mediator.html
# https://ds-pl-r-book.netlify.app/optimization-in-r.html
# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot 
# https://www.rdocumentation.org/packages/corrplot/versions/0.92/topics/corrplot
# https://cran.r-project.org/web/packages/corrplot/vignettes/corrplot-intro.html
# https://statisticsglobe.com/change-font-size-corrplot-r
# https://stackoverflow.com/questions/51115495/how-to-keep-order-of-the-correlation-plot-labels-as-same-in-the-datafile
# https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html 
# https://www.middleprofessor.com/files/applied-biostatistics_bookdown/_book/adding-covariates-to-a-linear-model
# https://github.com/MarioniLab/miloR
# https://www.nature.com/articles/s41467-023-40458-9/figures/4
# https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct 
# https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation
# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot
# https://stackoverflow.com/questions/38862303/customize-ggplot2-axis-labels-with-different-colors
# https://www.tutorialspoint.com/how-to-remove-rows-from-data-frame-in-r-that-contains-nan
# https://stackoverflow.com/questions/21847830/handling-java-lang-outofmemoryerror-when-writing-to-excel-from-r
# https://www.tutorialspoint.com/how-to-extract-strings-that-contains-a-particular-substring-in-an-r-vector
# https://stackoverflow.com/questions/13602170/how-do-i-find-the-difference-between-two-values-without-knowing-which-is-larger
# https://www.geeksforgeeks.org/difference-between-two-vectors-in-r/