### IVET datasets

IVET datasets, encompassing m5C, m1A, m6A, and unmodified samples, are accessible at https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE227087. The reference transcripts sequence are available at [our repository](https://github.com/yulab2021/TandemMod/tree/master/data/).

#### IVET unmodified

In [None]:
#IVET unmodified

#basecalling
guppy_basecaller -i IVET_unmodified_fast5 -s IVET_unmodified_guppy --num_callers 40 --recursive --fast5_out --config rna_r9.4.1_70bps_hac.cfg

#multi to single
multi_to_single_fast5 -i IVET_unmodified_guppy -s IVET_unmodified_guppy_single -t 40 --recursive 

#resquiggle
tombo resquiggle --overwrite --basecall-group Basecall_1D_001 IVET_unmodified_guppy_single ../data/IVET_reference.fa --processes 20 --fit-global-scale --include-event-stdev
cat IVET_unmodified_guppy/pass/*.fastq >IVET_unmodified.fastq

#map reads to reference
minimap2 -ax map-ont ../data/IVET_reference.fa IVET_unmodified.fastq > IVET_unmodified.sam

#feature extraction
python scripts/extract_signal_from_fast5.py -p 40 --fast5 IVET_unmodified_guppy_single --reference ../data/IVET_reference.fa --sam IVET_unmodified.sam --output IVET_unmodified.signal.tsv --clip 10
python scripts/extract_feature_from_signal.py  --signal_file IVET_unmodified.signal.tsv --clip 10 --output IVET_unmodified_A.feature.tsv --motif NNANN
python scripts/extract_feature_from_signal.py  --signal_file IVET_unmodified.signal.tsv --clip 10 --output IVET_unmodified_C.feature.tsv --motif NNCNN

#### IVET m1A

In [None]:
#IVET m1A

#basecalling
guppy_basecaller -i IVET_m1A_fast5 -s IVET_m1A_guppy --num_callers 40 --recursive --fast5_out --config rna_r9.4.1_70bps_hac.cfg

#multi to single
multi_to_single_fast5 -i IVET_m1A_guppy -s IVET_m1A_guppy_single -t 40 --recursive 

#resquiggle
tombo resquiggle --overwrite --basecall-group Basecall_1D_001 IVET_m1A_guppy_single ../data/IVET_reference.fa --processes 20 --fit-global-scale --include-event-stdev
cat IVET_m1A_guppy/pass/*.fastq >IVET_m1A.fastq

#map reads to reference
minimap2 -ax map-ont ../data/IVET_reference.fa IVET_m1A.fastq > IVET_m1A.sam

#feature extraction
python scripts/extract_signal_from_fast5.py -p 40 --fast5 IVET_m1A_guppy_single --reference ../data/IVET_reference.fa --sam IVET_m1A.sam --output IVET_m1A.signal.tsv --clip 10
python scripts/extract_feature_from_signal.py  --signal_file IVET_m1A.signal.tsv --clip 10 --output IVET_m1A.feature.tsv --motif NNANN


#### IVET m6A

In [None]:
#IVET m6A

#basecalling
guppy_basecaller -i IVET_m6A_fast5 -s IVET_m6A_guppy --num_callers 40 --recursive --fast5_out --config rna_r9.4.1_70bps_hac.cfg

#multi to single
multi_to_single_fast5 -i IVET_m6A_guppy -s IVET_m6A_guppy_single -t 40 --recursive 

#resquiggle
tombo resquiggle --overwrite --basecall-group Basecall_1D_001 IVET_m6A_guppy_single ../data/IVET_reference.fa --processes 20 --fit-global-scale --include-event-stdev
cat IVET_m6A_guppy/pass/*.fastq >IVET_m6A.fastq

#map reads to reference
minimap2 -ax map-ont ../data/IVET_reference.fa IVET_m6A.fastq > IVET_m6A.sam

#feature extraction
python scripts/extract_signal_from_fast5.py -p 40 --fast5 IVET_m6A_guppy_single --reference ../data/IVET_reference.fa --sam IVET_m6A.sam --output IVET_m6A.signal.tsv --clip 10
python scripts/extract_feature_from_signal.py  --signal_file IVET_m6A.signal.tsv --clip 10 --output IVET_m6A.feature.tsv --motif NNANN

#### IVET m5C

In [None]:
#IVET m5C

#basecalling
guppy_basecaller -i IVET_m5C_fast5 -s IVET_m5C_guppy --num_callers 40 --recursive --fast5_out --config rna_r9.4.1_70bps_hac.cfg

#multi to single
multi_to_single_fast5 -i IVET_m5C_guppy -s IVET_m5C_guppy_single -t 40 --recursive 

#resquiggle
tombo resquiggle --overwrite --basecall-group Basecall_1D_001 IVET_m5C_guppy_single ../data/IVET_reference.fa --processes 20 --fit-global-scale --include-event-stdev
cat IVET_m5C_guppy/pass/*.fastq >IVET_m5C.fastq

#map reads to reference
minimap2 -ax map-ont ../data/IVET_reference.fa IVET_m5C.fastq > IVET_m5C.sam

#feature extraction
python scripts/extract_signal_from_fast5.py -p 40 --fast5 IVET_m5C_guppy_single --reference ../data/IVET_reference.fa --sam IVET_m5C.sam --output IVET_m5C.signal.tsv --clip 10
python scripts/extract_feature_from_signal.py  --signal_file IVET_m5C.signal.tsv --clip 10 --output IVET_m5C.feature.tsv --motif NNCNN

### Train-test split

In [None]:
python scripts/train_test_split.py --input_file IVET_unmodified_A.feature.tsv --train_file IVET_unmodified_A.train.feature.tsv --test_file IVET_unmodified_A.test.feature.tsv --train_ratio 0.8
python scripts/train_test_split.py --input_file IVET_unmodified_C.feature.tsv --train_file IVET_unmodified_C.train.feature.tsv --test_file IVET_unmodified_C.test.feature.tsv --train_ratio 0.8
python scripts/train_test_split.py --input_file IVET_m1A.feature.tsv --train_file IVET_m1A.train.feature.tsv --test_file IVET_m1A.test.feature.tsv --train_ratio 0.8
python scripts/train_test_split.py --input_file IVET_m6A.feature.tsv --train_file IVET_m6A.train.feature.tsv --test_file IVET_m6A.test.feature.tsv --train_ratio 0.8
python scripts/train_test_split.py --input_file IVET_m5C.feature.tsv --train_file IVET_m5C.train.feature.tsv --test_file IVET_m5C.test.feature.tsv --train_ratio 0.8

#### Model training

In [None]:
#train m6A model
python scripts/TandemMod.py --run_mode train \
  --new_model model/m6A.IVET.pkl \
  --train_data_mod IVET_m6A.train.feature.tsv \
  --train_data_unmod IVET_unmodified_A.train.feature.tsv \
  --test_data_mod IVET_m6A.test.feature.tsv \
  --test_data_unmod IVET_unmodified_A.test.feature.tsv \
  --epoch 100

#train m5C model
python scripts/TandemMod.py --run_mode train \
  --new_model model/m5C.IVET.pkl \
  --train_data_mod IVET_m5C.train.feature.tsv \
  --train_data_unmod IVET_unmodified_C.train.feature.tsv \
  --test_data_mod IVET_m5C.test.feature.tsv \
  --test_data_unmod IVET_unmodified_C.test.feature.tsv \
  --epoch 100