It is used to investigate the dependence between multiple variables at the same time and to highlight the most correlated variables in a data table.
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. (ns tservice.plugins.corrplot
  2. (:require [clojure.data.json :as json]
  3. [clojure.spec.alpha :as s]
  4. [clojure.tools.logging :as log]
  5. [spec-tools.core :as st]
  6. [tservice.util :as u]
  7. [tservice.lib.files :as ff]
  8. [tservice.lib.fs :as fs-lib]
  9. [tservice.plugins.corrplot.common :as corrplot]
  10. [tservice.api.task :refer [publish-event! make-plugin-metadata make-events-init create-task! update-process!]]))
  11. ;;; ------------------------------------------------ Event Specs ------------------------------------------------
  12. (s/def ::datafile
  13. (st/spec
  14. {:spec string?
  15. :type :string
  16. :description "A path for data file."
  17. :swagger/default ""
  18. :reason "The datafile must be string."}))
  19. (s/def ::corr_vars
  20. (st/spec
  21. {:spec (s/coll-of string?)
  22. :type :array
  23. :description "Variables."
  24. :swagger/default nil
  25. :reason "The corr_vars must be a vector."}))
  26. (s/def ::method
  27. (st/spec
  28. {:spec #{"square" "circle"}
  29. :type :string
  30. :description "The visualization method of correlation matrix to be used. Allowed values are square (default), circle."
  31. :swagger/default "square"
  32. :reason "The corr_vars must be a vector."}))
  33. (s/def ::corr_type
  34. (st/spec
  35. {:spec #{"full" "lower" "upper"}
  36. :type :string
  37. :description "full (default), lower or upper display."
  38. :swagger/default "full"
  39. :reason "The corr_type must be one of full, lower, upper."}))
  40. (s/def ::hc_method
  41. (st/spec
  42. {:spec #{"ward.D" "ward.D2" "single" "complete" "average" "mcquitty" "median" "centroid"}
  43. :type :string
  44. :description "The agglomeration method to be used in hclust (see ?hclust)."
  45. :swagger/default "complete"
  46. :reason "The hc_method must be (an unambiguous abbreviation of) one of ward.D, ward.D2,
  47. single, complete, average, mcquitty, median or centroid."}))
  48. (s/def ::hc_order
  49. (st/spec
  50. {:spec #{true false}
  51. :type :bool
  52. :description "Logical value. If TRUE, correlation matrix will be hc.ordered using hclust function."
  53. :swagger/default true
  54. :reason "The hc_order must be one of true, false."}))
  55. (s/def ::sig_level
  56. (st/spec
  57. {:spec #(and (>= % 0) (<= % 1))
  58. :type :float
  59. :description "Significant level, greater than 0 and less than 1."
  60. :swagger/default 0.05
  61. :reason "The sig_level must be a float."}))
  62. (def corrplot-params-body
  63. "A spec for the body parameters."
  64. (s/keys :req-un [::datafile ::corr_vars]
  65. :opt-un [::sig_level ::hc_order ::hc_method ::corr_type ::method]))
  66. ;;; ------------------------------------------------ Event Metadata ------------------------------------------------
  67. (def metadata
  68. (make-plugin-metadata
  69. {:name "corrplot"
  70. :summary "It is used to investigate the dependence between multiple variables at the same time and to highlight the most correlated variables in a data table."
  71. :params-schema corrplot-params-body
  72. :handler (fn [{:keys [datafile corr_vars sig_level hc_order hc_method corr_type method plugin-env]
  73. :or {sig_level 0.05
  74. hc_order true
  75. hc_method "complete"
  76. corr_type "full"
  77. method "square"}
  78. :as payload}]
  79. (log/info "Make a correlation plot with %s" payload)
  80. (let [workdir (ff/get-workdir)
  81. log-path (fs-lib/join-paths workdir "log")
  82. response {:files [(fs-lib/join-paths workdir "plotly.json")
  83. (fs-lib/join-paths workdir "result.md")]
  84. :log log-path
  85. :response-type :data2files}
  86. task-id (create-task! {:name (str "corrplot" (u/datetime))
  87. :description "Make a correlation plot."
  88. :payload payload
  89. :plugin-name (:plugin-name plugin-env)
  90. :plugin-type (:plugin-type plugin-env)
  91. :plugin-version (:plugin-version plugin-env)
  92. :response response})]
  93. (fs-lib/create-directories! workdir)
  94. (spit log-path (json/write-str {:status "Running" :msg ""}))
  95. (update-process! task-id 0)
  96. (publish-event! "corrplot"
  97. {:context {:datafile datafile
  98. :corr_vars corr_vars
  99. :sig_level sig_level
  100. :hc_order hc_order
  101. :hc_method hc_method
  102. :corr_type corr_type
  103. :method method
  104. :title "Correlation Plot"}
  105. :template-dir (fs-lib/join-paths (:config-dir plugin-env) "templates")
  106. :env-dir (:env-dir plugin-env)
  107. :dest-dir workdir
  108. :task-id task-id})
  109. response))
  110. :plugin-type :ChartPlugin
  111. :response-type :data2files}))
  112. ;;; ------------------------------------------------ Event Processing ------------------------------------------------
  113. (defn- corrplot!
  114. "Make a correlation plot"
  115. [{:keys [context dest-dir template-dir env-dir task-id]}]
  116. (let [log-path (fs-lib/join-paths dest-dir "log")
  117. args-json (fs-lib/join-paths dest-dir "arguments.json")
  118. args-template (fs-lib/join-paths template-dir "args.json.template")
  119. output-file (fs-lib/join-paths dest-dir "plotly.json")]
  120. (corrplot/make-args-json! args-template context args-json)
  121. (log/info "Make an argument json file: " args-json)
  122. (update-process! task-id 30)
  123. (let [result (corrplot/call-corrplot! args-json output-file env-dir)
  124. status (:status result)
  125. process (if (= status "Success") 100 -1)]
  126. (spit log-path (json/write-str result))
  127. (update-process! task-id process)
  128. (if (= status "Success")
  129. (log/info "The task is finished, result file is " output-file)
  130. (log/error "The task is failed, error msg is located in " log-path)))))
  131. ;;; --------------------------------------------------- Lifecycle ----------------------------------------------------
  132. (def events-init
  133. "Automatically called during startup; start event listener for corrplot events."
  134. (make-events-init "corrplot" corrplot!))