version 1.0

workflow fastp {
    input{
        Array[String] samples
        File fastq_dir
    }

    scatter (sample_id in samples){
        call pair_end{
            input:
            in1 = fastq_dir+'/'+sample_id+'_1.fastq',
            in2 = fastq_dir+'/'+sample_id+'_2.fastq',
            sample_id = sample_id
        }
    }
    

    # output {
    #     File clean_out1 = pair_end.out1
    #     File clean_out2 = pair_end.out2
    #     File html_report = pair_end.html_report
    #     File json_report = pair_end.json_report
    # }

}

task pair_end {

    input {
        
        # I/O options
        File in1
        File in2
        String sample_id

        Boolean? phred64 = false 
        Boolean? fix_mgi_id = false

        String? adapter_sequence
        String? adapter_sequence_r2

        Int? reads_to_process # specify how many reads/pairs to be processed. Default 0 means process all reads.

        # reporting options
        String json = sample_id+"fastp.json"
        String html = sample_id+"fastp.html"
        String report_title = "\'fastp report\'"

        # excute env
        Int cpu = 2
        String memory = "4G"
        String disks = "local-disk 50 cloud_ssd"

    }

    String out1_name = sample_id+'clean_1.fastq'
    String out2_name = sample_id+'clean_2.fastq'

    command <<<

        # basic command
        /opt/conda/bin/fastp \
        --in1 ~{in1} \
        --in2 ~{in2} \
        --out1 ~{out1_name} \
        --out2 ~{out2_name} \
        --json ~{json} \
        --html ~{html} \
        --report_title ~{report_title} \
        
        # options 
        ~{ true="--phred64 " false="" phred64 } \
        ~{ "--reads_to_process " + reads_to_process } \
        ~{ true="--fix_mgi_id " false="" fix_mgi_id } \
        ~{ "--adapter_sequence " + adapter_sequence } \
        ~{ "--adapter_sequence_r2 " + adapter_sequence_r2 }

    >>>

    runtime {
        cpu: cpu
        memory: memory
        disks: disks
        docker: "registry-vpc.cn-shanghai.aliyuncs.com/easygene/fastp:v0.20.1_cv1"
    }

    output {
        File out1 = out1_name
        File out2 = out2_name
        File json_report = json
        File html_report = html
    }

}