-
Notifications
You must be signed in to change notification settings - Fork 11
/
scIGANs
190 lines (183 loc) · 6.39 KB
/
scIGANs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/bin/bash
## parsing the arguments
## last update: 2020/03/28
version="scIGANs_0.1.2"
epochs=200 # number of epochs for training
sim_size=200 # number of simulated datasets for imputation
knn_k=10 # number of the neighbours used for imputation
label="" # file path for cell labels
batch_size=8 #
PARAMS=""
impute=""
latent_dim=100
threthold=0.001 ## the convergence threthold; traing will stop if the dM < threthold
job_name=""
#e_matrix=$1 ## the first argument is the input expression matrix
help_message (){
echo "--------------"
echo $version
echo "--------------"
echo ""
echo "USAGE: scIGANs <in.matrix.txt> [-l in.label.txt] [options]"
echo ""
echo "in.matrix.txt A tab-delimited text file, containing the expression counts with genes in rows and cells in columns. The first row is header and first column is gene IDs or names. <required>"
echo "-h | --help show this message"
echo "-l | --label_file <string> optional give the label of cell type or subpopulation for each cell, with the same order of the colounms in expression matrix."
echo "-e | --epoch <integer> optional set the number of epochs of training. Default = 200"
echo "-s | --sim_size <integer> optional set the number of generated data for imputation. Default = 200"
echo "-k | --knn_k <integer> optional set the number of nearest neighbours used for imputation. Default = 10"
echo "-d | --latent_dim <integer> optional dimension of the latent space of generator. Default = 100"
echo "-b | --batch_size <integer> optional how many samples per batch to load. More samples per batch requires more memory. Default = 8; max=number_of_cells"
echo "-t | --threthold <integer> optional convergence threthold. Default = 0.01"
echo "--impute optional Set this option to skip training and directly impute using pretrained model for the same input settings."
echo "-j | --job_name <string> optional user-defined job name (a string without spaces), which will be used to name the output files."
echo "-o | --outdir <string> optional set the path where to write the imputed matrix. Default: current working directory (pwd)."
exit
}
outdir=`pwd`
Bashdir=$(dirname $BASH_SOURCE)
while (( "$#" )); do
case "$1" in
-l|--label_file)
label=$2
shift 2
;;
-e|--epoch)
epochs=$2
shift 2
;;
-s|--sim_size)
sim_size=$2
shift 2
;;
-k|--knn_k)
knn_k=$2
shift 2
;;
-b|--batch_size)
batch_size=$2
shift 2
;;
-d|--latent_dim)
latent_dim=$2
shift 2
;;
-t|--threthold)
threthold=$2
shift 2
;;
-j|--job_name)
job_name=$2
shift 2
;;
-o|--outdir)
outdir=$2
mkdir -p $outdir
shift 2
;;
--impute)
impute="impute"
shift
;;
-h|--help)
help_message
shift
;;
--) # end argument parsing
shift
break
;;
-*|--*=) # unsupported flags
echo "Error: Unsupported argument $1" >&2
help_message
exit 1
;;
*) # preserve positional arguments
PARAMS="$PARAMS $1"
shift
;;
esac
done
# set positional arguments in their proper place
eval set -- "$PARAMS"
# print out the help message if no option provided
if [ -z "$PARAMS" ]
then
help_message
fi
e_matrix=$(awk -v OFS=" " '{print $1}' <<< $PARAMS)
set -e
timestamp=$(date '+%Y%m%d%H%M')
fname=`basename $e_matrix` ## get the filename without path
if [ -z "$label" ]
then
lname="noLabel"
else
lname=`basename $label`
fi
if [ -z "$job_name" ]
then
job_name=$fname"_"$lname
fi
# create a tmp folder for intermediate outputs
tmpdir=$outdir'/scIGANs_'$job_name'_tmp'$timestamp
mkdir -p $tmpdir
# the log file
logfile=$tmpdir".log"
echo ""
echo $version
echo $version >> $logfile
echo "" >> $logfile
python_path=python # default python
Rscript_path=Rscript # default R
if [ -n "$CONDA_PREFIX" ] ## check if running within a conda evn, and then use the python and R from conda env
then
echo "scIGANs is running in a conda environment: "$CONDA_DEFAULT_ENV
python_path=$CONDA_PREFIX/bin/python
Rscript_path=$CONDA_PREFIX/bin/Rscript
fi
echo ""
echo "Prechecking input files..."
echo $(date)" Prechecking input files..." >> $logfile
$Rscript_path ${Bashdir}/src/inpro.R ${e_matrix} $tmpdir $logfile $label 2>&1 | tee -a $logfile
if [ $? -ne 0 ]
then
exit 1
fi
while read line; do
par+=($line)
done < $tmpdir/args
img_size=${par[0]}
ncls=${par[1]}
label=${par[2]}
echo ""
if [ "$impute" == "" ]
then
echo "Training..."
echo "" >> $logfile
echo $(date)" Training..." >> $logfile
echo ""
echo "Command for training:" >> $logfile
echo " "--file_d=$tmpdir/${fname} --file_c=$label --ncls=$ncls --img_size=$img_size --n_epochs=$epochs --sim_size=$sim_size --knn_k=$knn_k --batch_size=$batch_size --latent_dim=$latent_dim --threthold=$threthold --train >> $logfile
$python_path ${Bashdir}/src/imputeByGans.py --file_d=$tmpdir/${fname} --file_c=$label --ncls=$ncls --img_size=$img_size --n_epochs=$epochs --sim_size=$sim_size --knn_k=$knn_k --batch_size=$batch_size --latent_dim=$latent_dim --threthold=$threthold --train --job_name $job_name --outdir $outdir #2>&1 | tee -a $logfile
echo "Imputing..."
echo "" >> $logfile
echo $(date)" Imputing..." >> $logfile
echo ""
echo "Command for Imputing:" >> $logfile
echo " "--file_d=$tmpdir/${fname} --file_c=$label --ncls=$ncls --img_size=$img_size --n_epochs=$epochs --sim_size=$sim_size --knn_k=$knn_k --latent_dim=$latent_dim --impute >> $logfile
$python_path ${Bashdir}/src/imputeByGans.py --file_d=$tmpdir/${fname} --file_c=$label --ncls=$ncls --img_size=$img_size --n_epochs=$epochs --sim_size=$sim_size --knn_k=$knn_k --latent_dim=$latent_dim --impute --job_name $job_name --outdir $outdir #2>&1 | tee -a $logfile
else
echo "Imputing..."
echo "" >> $logfile
echo $(date)" Imputing..." >> $logfile
echo ""
echo "Command for Imputing:" >> $logfile
echo " "--file_d=$tmpdir/${fname} --file_c=$label --ncls=$ncls --img_size=$img_size --n_epochs=$epochs --sim_size=$sim_size --knn_k=$knn_k --latent_dim=$latent_dim --impute >> $logfile
$python_path ${Bashdir}/src/imputeByGans.py --file_d=$tmpdir/${fname} --file_c=$label --ncls=$ncls --img_size=$img_size --n_epochs=$epochs --sim_size=$sim_size --knn_k=$knn_k --latent_dim=$latent_dim --impute --job_name $job_name --outdir $outdir #2>&1 | tee -a $logfile
fi
if [ $? -eq 0 ]
then
$Rscript_path ${Bashdir}/src/outpro.R $job_name $tmpdir $outdir $timestamp 2>&1 | tee -a $logfile
fi
rm -r $tmpdir