version 1.0 workflow fastp { input{ Array[String] samples File fastq_dir } scatter (sample_id in samples){ call pair_end{ input: in1 = fastq_dir+'/'+sample_id+'_1.fastq', in2 = fastq_dir+'/'+sample_id+'_2.fastq', sample_id = sample_id } } # output { # File clean_out1 = pair_end.out1 # File clean_out2 = pair_end.out2 # File html_report = pair_end.html_report # File json_report = pair_end.json_report # } } task pair_end { input { # I/O options File in1 File in2 String sample_id Boolean? phred64 = false Boolean? fix_mgi_id = false String? adapter_sequence String? adapter_sequence_r2 Int? reads_to_process # specify how many reads/pairs to be processed. Default 0 means process all reads. # reporting options String json = sample_id+"fastp.json" String html = sample_id+"fastp.html" String report_title = "\'fastp report\'" # excute env Int cpu = 2 String memory = "4G" String disks = "local-disk 50 cloud_ssd" } String out1_name = sample_id+'clean_1.fastq' String out2_name = sample_id+'clean_2.fastq' command <<< # basic command /opt/conda/bin/fastp \ --in1 ~{in1} \ --in2 ~{in2} \ --out1 ~{out1_name} \ --out2 ~{out2_name} \ --json ~{json} \ --html ~{html} \ --report_title ~{report_title} \ # options ~{ true="--phred64 " false="" phred64 } \ ~{ "--reads_to_process " + reads_to_process } \ ~{ true="--fix_mgi_id " false="" fix_mgi_id } \ ~{ "--adapter_sequence " + adapter_sequence } \ ~{ "--adapter_sequence_r2 " + adapter_sequence_r2 } >>> runtime { cpu: cpu memory: memory disks: disks docker: "registry-vpc.cn-shanghai.aliyuncs.com/easygene/fastp:v0.20.1_cv1" } output { File out1 = out1_name File out2 = out2_name File json_report = json File html_report = html } }