Merge RNA-Seq expression tables and generating multiqc report.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
3.2KB

  1. (ns tservice.plugins.merge-rnaseq-expression.commons
  2. (:require [clojure.data.csv :as csv]
  3. [clojure.string :as clj-str]
  4. [tservice.lib.files :refer [get-tservice-workdir get-plugin-jar-dir get-path-variable]]
  5. [tservice.lib.fs :as fs-lib]
  6. [clojure.java.io :as io]
  7. [clojure.java.shell :as shell :refer [sh]])
  8. (:import [org.apache.commons.io.input BOMInputStream]))
  9. (defn add-env-to-path
  10. [plugin-name]
  11. (let [env-bin-path (fs-lib/join-paths (get-plugin-jar-dir)
  12. "envs" plugin-name "bin")
  13. path (get-path-variable)]
  14. (str env-bin-path ":" path)))
  15. (defn hashmap->parameters
  16. "{ '-d' 'true' '-o' 'output' } -> '-d true -o output'"
  17. [coll]
  18. (clj-str/join " " (map #(clj-str/join " " %) (into [] coll))))
  19. (defn call-command!
  20. [cmd parameters-coll]
  21. (shell/with-sh-env {:PATH (add-env-to-path "merge-rnaseq-expression")
  22. :LC_ALL "en_US.utf-8"
  23. :LANG "en_US.utf-8"}
  24. (let [command ["bash" "-c" (format "%s %s" cmd (hashmap->parameters parameters-coll))]
  25. result (apply sh command)
  26. status (if (= (:exit result) 0) "Success" "Error")
  27. msg (str (:out result) "\n" (:err result))]
  28. {:status status
  29. :msg msg})))
  30. (defn csv-data->maps [csv-data]
  31. (map zipmap
  32. (->> (first csv-data) ;; First row is the header
  33. (map keyword) ;; Drop if you want string keys instead
  34. repeat)
  35. (rest csv-data)))
  36. (defn bom-reader
  37. "Remove `Byte Order Mark` and return reader"
  38. [filepath]
  39. (-> filepath
  40. io/input-stream
  41. BOMInputStream.
  42. io/reader))
  43. (defn guess-separator
  44. [filepath]
  45. (with-open [reader (bom-reader filepath)]
  46. (let [header (first (line-seq reader))
  47. seps [\tab \, \; \space]
  48. sep-map (->> (map #(hash-map % (count (clj-str/split header (re-pattern (str %))))) seps)
  49. (into {}))]
  50. (key (apply max-key val sep-map)))))
  51. (defn read-csv
  52. [^String file]
  53. (when (.isFile (io/file file))
  54. (with-open
  55. [reader (io/reader file)]
  56. (doall
  57. (->> (csv/read-csv reader :separator (guess-separator file))
  58. csv-data->maps)))))
  59. (defn vec-remove
  60. "Remove elem in coll"
  61. [pos coll]
  62. (vec (concat (subvec coll 0 pos) (subvec coll (inc pos)))))
  63. (defn write-csv!
  64. "Write row-data to a csv file, row-data is a vector that each element is a map."
  65. [path row-data]
  66. (let [columns (keys (first row-data))
  67. headers (map name columns)
  68. rows (mapv #(mapv % columns) row-data)]
  69. (with-open [file (io/writer path)]
  70. (csv/write-csv file (cons headers rows) :separator \tab))))
  71. (defn write-csv-by-cols! [path row-data columns]
  72. (let [headers (map name columns)
  73. rows (mapv #(mapv % columns) row-data)]
  74. (with-open [file (io/writer path)]
  75. (csv/write-csv file (cons headers rows)))))
  76. (defn is-localpath?
  77. [filepath]
  78. (re-matches #"^file:\/\/.*" filepath))
  79. (defn correct-filepath
  80. [filepath]
  81. (if (is-localpath? filepath)
  82. (if (re-matches #"^file:\/\/\/.*" filepath)
  83. ; Absolute path with file://
  84. (clj-str/replace filepath #"^file:\/\/" "")
  85. (fs-lib/join-paths (get-tservice-workdir) (clj-str/replace filepath #"^file:\/\/" "")))
  86. filepath))