"
+ exit 0
+ ;;
+esac
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+if [ ${cpu_name} == "aarch64" ]
+then
+ cpu_name="aarch_64"
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+table_name=$1
+col1=$2
+col2=$3
+colWeight=$4
+weighted=$5
+k=$6
+p=$7
+partition=$8
+save_mode=$9
+save_arg=${10}
+
+echo "table_name: $table_name"
+echo "col1: $col1"
+echo "col2: $col2"
+echo "colWeight: $colWeight"
+echo "weighted: $weighted"
+echo "k: $k"
+echo "p: $p"
+echo "partition: $partition"
+echo "save_mode: $save_mode"
+echo "save_arg: $save_arg"
+
+spark-submit \
+--class com.bigdata.graph.ClosenessHiveRunner \
+--master yarn \
+--deploy-mode "client" \
+--num-executors 35 \
+--executor-memory "25g" \
+--executor-cores 4 \
+--driver-memory "16g" \
+--conf spark.worker.timeout=3600 \
+--conf spark.driver.maxResultSize=200g \
+--conf spark.rpc.askTimeout=36000 \
+--conf spark.network.timeout=6000s \
+--conf spark.broadcast.blockSize=4m \
+--conf spark.shuffle.manager=SORT \
+--conf spark.shuffle.blockTransferService=nio \
+--conf spark.locality.wait.node=0 \
+--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+--conf spark.rdd.compress=true \
+--conf spark.shuffle.compress=true \
+--conf spark.shuffle.spill.compress=true \
+--conf spark.io.compression.codec=lz4 \
+--jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+./lib/kal-test_${scala_version_val}-0.1.jar ${table_name} ${col1} ${col2} ${colWeight} ${weighted} ${k} ${p} ${partition} ${save_mode} ${save_arg}
diff --git a/tools/kal-test/bin/graph/clusteringcoefficient_run.sh b/tools/kal-test/bin/graph/clusteringcoefficient_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..73c994ed948ff5ec04944c9d90e7fc0c9a8751e8
--- /dev/null
+++ b/tools/kal-test/bin/graph/clusteringcoefficient_run.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: name of dataset: cit_patents,uk_2002,arabic_2005,graph500_22,graph500_23,graph500_24,graph500_25"
+ echo "2nd argument: name of api: lcc,avgcc,globalcc"
+ echo "3nd argument: weight or not: weighted,unweighted"
+ echo "4th argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 4 ];then
+ alg_usage
+ exit 0
+fi
+
+source conf/graph/clusteringcoefficient/clusteringcoefficient_spark.properties
+
+dataset_name=$1
+api_name=$2
+weight=$3
+is_raw=$4
+
+if [ ${dataset_name} != "cit_patents" ] &&
+ [ ${dataset_name} != "uk_2002" ] &&
+ [ ${dataset_name} != "arabic_2005" ] &&
+ [ ${dataset_name} != "graph500_22" ] &&
+ [ ${dataset_name} != "graph500_23" ] &&
+ [ ${dataset_name} != "graph500_24" ] &&
+ [ ${dataset_name} != "graph500_25" ] ;then
+ echo "invalid dataset name,dataset name:cit_patents,uk_2002,arabic_2005,graph500_22,graph500_23,graph500_24,graph500_25"
+ exit 1
+fi
+if [ ${api_name} != "lcc" ] &&
+ [ ${api_name} != "avgcc" ] &&
+ [ ${api_name} != "globalcc" ] ;then
+ echo "invalid argument value,api name: lcc,avgcc,globalcc"
+ exit 1
+fi
+if [ ${weight} != "weighted" ] && [ ${weight} != "unweighted" ];then
+ echo "invalid argument value,must be: weighted or unweighted"
+ exit 1
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+num_executors="${dataset_name}_numExecutors_${cpu_name}"
+executor_cores="${dataset_name}_executorCores_${cpu_name}"
+executor_memory="${dataset_name}_executorMemory_${cpu_name}"
+num_partitions="${dataset_name}_numPartitions_${cpu_name}"
+deploy_mode="deployMode"
+driver_memory="driverMemory"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+deploy_mode_val=${!deploy_mode}
+num_partitions_val=${!num_partitions}
+driver_memory_val=${!driver_memory}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${num_partitions} : ${num_partitions_val}"
+echo "${driver_memory}:${driver_memory_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${num_partitions_val} ] ; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+output_path="${output_path_prefix}/clusteringcoefficient/${is_raw}/${api_name}/${dataset_name}_${weight}"
+echo "${dataset_name} : ${data_path_val}"
+echo "output_path : ${output_path}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- clusteringcoefficient-${api_name}-${weight}-${dataset_name}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.ClusteringCoefficientRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --conf spark.executor.memoryOverhead=2048 \
+ --conf spark.executor.extraJavaOptions="-Xms12g" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${num_partitions_val} ${weight} ${is_raw} ${data_path_val} ${api_name} ${output_path} | tee ./log/log
+else
+ scp lib/lcc_kaiyuan.jar root@agent1:/opt/graph_classpath/
+ scp lib/lcc_kaiyuan.jar root@agent2:/opt/graph_classpath/
+ scp lib/lcc_kaiyuan.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.ClusteringCoefficientRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --name "clusteringcoefficient_${dataset_name}_${api_name}" \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --conf spark.rpc.askTimeout=36000 \
+ --conf spark.scheduler.maxRegisteredResourcesWaitingTime=3600000 \
+ --conf spark.worker.timeout=3600 \
+ --conf spark.network.timeout=6000s \
+ --conf spark.storage.blockManagerSlaveTimeoutMs=600000 \
+ --conf spark.shuffle.blockTransferService=nio \
+ --conf spark.driver.maxResultSize=100g \
+ --conf spark.shuffle.manager=SORT \
+ --conf spark.broadcast.blockSize=25g \
+ --conf spark.network.timeout=1200s \
+ --conf spark.rpc.message.maxSize=2046 \
+ --conf spark.core.connection.ack.wait.timeout=60000s \
+ --conf spark.executor.extraJavaOptions="-Xms35g" \
+ --conf spark.rdd.compress=true \
+ --jars "lib/lcc_kaiyuan.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/lcc_kaiyuan.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/lcc_kaiyuan.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${num_partitions_val} ${weight} ${is_raw} ${data_path_val} ${api_name} ${output_path} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/deepwalk_run.sh b/tools/kal-test/bin/graph/deepwalk_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b2cf0a4c6c31a59e13bb20fc3d85ad70931b8b43
--- /dev/null
+++ b/tools/kal-test/bin/graph/deepwalk_run.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: cit_patents_deepwalk"
+ echo "2nd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ]; then
+ usage
+ exit 0
+fi
+
+source conf/graph/deepwalk/deepwalk_spark.properties
+
+dataset_name=$1
+is_raw=$2
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${dataset_name}_${cpu_name}
+executor_cores="executorCores_"${dataset_name}_${cpu_name}
+executor_memory="executorMemory_"${dataset_name}_${cpu_name}
+extra_java_options="extraJavaOptions_"${dataset_name}_${cpu_name}
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+deploy_mode_val=${!deploy_mode}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_java_options_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+data_path=${dataset_name}
+data_path_val=${!data_path}
+echo "${dataset_name} : ${data_path_val}"
+
+model_conf=${dataset_name}-${cpu_name}
+
+outputPath="/tmp/graph/result/deepwalk/${dataset_name}/${is_raw}"
+hdfs dfs -rm -r -f ${outputPath}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- DeepWalk"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+spark-submit \
+ --class com.bigdata.graph.DeepWalkRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 300g \
+ --conf spark.kryoserializer.buffer.max=2047m \
+ --conf spark.ui.showConsoleProgress=true \
+ --conf spark.driver.maxResultSize=0 \
+ --conf spark.driver.extraJavaOptions="-Xms300G -XX:hashCode=0" \
+ --conf spark.executor.extraJavaOptions="-Xms315G -XX:hashCode=0" \
+ --conf spark.rpc.askTimeout=1000000s \
+ --conf spark.network.timeout=1000000s \
+ --conf spark.executor.heartbeatInterval=100000s \
+ --conf spark.rpc.message.maxSize=1000 \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${outputPath} ${is_raw} | tee ./log/log
+
+else
+
+ walkLength="walkLength_"${dataset_name}_${cpu_name}
+ numWalks="numWalks_"${dataset_name}_${cpu_name}
+ dimension="dimension_"${dataset_name}_${cpu_name}
+ partitions="partitions_"${dataset_name}_${cpu_name}
+ iteration="iteration_"${dataset_name}_${cpu_name}
+ windowSize="windowSize_"${dataset_name}_${cpu_name}
+ splitGraph="splitGraph_"${dataset_name}_${cpu_name}
+
+ walkLength_val=${!walkLength}
+ numWalks_val=${!numWalks}
+ dimension_val=${!dimension}
+ partitions_val=${!partitions}
+ iteration_val=${!iteration}
+ windowSize_val=${!windowSize}
+ splitGraph_val=${!splitGraph}
+
+spark-submit \
+ --class com.nrl.SparkedDeepWalkApp \
+ --master yarn \
+ --num-executors 6 \
+ --executor-memory 95g \
+ --driver-memory 300g \
+ --executor-cores 38 \
+ --driver-cores 80 \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.kryoserializer.buffer=48m \
+ --conf spark.driver.extraJavaOptions="-Xms300g -XX:hashCode=0" \
+ --conf spark.executor.extraJavaOptions="-Xms95g -XX:hashCode=0" \
+ --conf spark.driver.maxResultSize=0 \
+ --conf spark.rpc.askTimeout=1000000s \
+ --conf spark.network.timeout=1000000s \
+ --conf spark.executor.heartbeatInterval=100000s \
+ --conf spark.rpc.message.maxSize=1000 \
+ ./lib/sparked-deepwalk_2.11-1.0.jar "" ${data_path_val} "" "" "" "" ${outputPath} ${walkLength_val} ${numWalks_val} ${dimension_val} ${partitions_val} ${iteration_val} ${windowSize_val} ${splitGraph_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/degree_run.sh b/tools/kal-test/bin/graph/degree_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cddeeac47e48038c58fc28c5a1c8ccfef066371b
--- /dev/null
+++ b/tools/kal-test/bin/graph/degree_run.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: it_2004,twitter7,uk_2007_05,mycielskian20,gap_kron,com_friendster"
+ echo "2nd argument: name of api: degrees,inDegrees,outDegrees"
+ echo "3rd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ];then
+ alg_usage
+ exit 0
+fi
+
+source conf/graph/degree/degree_spark.properties
+
+dataset_name=$1
+api_name=$2
+is_raw=$3
+
+if [ ${dataset_name} != "it_2004" ] &&
+ [ ${dataset_name} != "twitter7" ] &&
+ [ ${dataset_name} != "uk_2007_05" ] &&
+ [ ${dataset_name} != "mycielskian20" ] &&
+ [ ${dataset_name} != "gap_kron" ] &&
+ [ ${dataset_name} != "com_friendster" ] ;then
+ echo "invalid dataset name,dataset name:it_2004,twitter7,uk_2007_05,mycielskian20,gap_kron,com_friendster"
+ exit 1
+fi
+
+if [ ${api_name} != "degrees" ] &&
+ [ ${api_name} != "inDegrees" ] &&
+ [ ${api_name} != "outDegrees" ];then
+ echo "invalid api name,api name: degrees,inDegrees,outDegrees"
+ exit 1
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+num_executors="${api_name}_${dataset_name}_numExecutors_${cpu_name}"
+executor_cores="${api_name}_${dataset_name}_executorCores_${cpu_name}"
+executor_memory="${api_name}_${dataset_name}_executorMemory_${cpu_name}"
+num_partitions="${api_name}_${dataset_name}_numPartitions_${cpu_name}"
+split="${dataset_name}_splitGraph"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+deploy_mode_val=${!deploy_mode}
+num_partitions_val=${!num_partitions}
+extra_java_options_val="-Xms${executor_memory_val}"
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${num_partitions} : ${num_partitions_val}"
+echo "extra_java_options_val : ${extra_java_options_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_java_options_val} ] ||
+ [ ! ${num_partitions_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+output_path="${output_path_prefix}/degree/${is_raw}/${dataset_name}_${api_name}"
+echo "${dataset_name} : ${data_path_val}"
+echo "output_path : ${output_path}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- degree-${api_name}-${dataset_name}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.DegreeRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 200g \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.driver.maxResultSize=0 \
+ --conf spark.rpc.askTimeout=1000000s \
+ --conf spark.network.timeout=1000000s \
+ --conf spark.executor.heartbeatInterval=100000s \
+ --conf spark.rpc.message.maxSize=1000 \
+ --conf spark.broadcast.blockSize=1m \
+ --conf spark.reducer.maxSizeInFlight=59mb \
+ --conf spark.shuffle.file.buffer=17k \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.io.compression.codec=lzf \
+ --conf spark.shuffle.compress=true \
+ --conf spark.rdd.compress=false \
+ --conf spark.shuffle.io.preferDirectBufs=true \
+ --conf spark.shuffle.spill.compress=true \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar,lib/boostkit-graph-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${api_name} ${num_partitions_val} ${is_raw} ${data_path_val} ${output_path} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.graph.DegreeRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 200g \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.driver.maxResultSize=0 \
+ --conf spark.rpc.askTimeout=1000000s \
+ --conf spark.network.timeout=1000000s \
+ --conf spark.executor.heartbeatInterval=100000s \
+ --conf spark.rpc.message.maxSize=1000 \
+ --conf spark.broadcast.blockSize=1m \
+ --conf spark.reducer.maxSizeInFlight=59mb \
+ --conf spark.shuffle.file.buffer=17k \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.io.compression.codec=lzf \
+ --conf spark.shuffle.compress=true \
+ --conf spark.rdd.compress=false \
+ --conf spark.shuffle.io.preferDirectBufs=true \
+ --conf spark.shuffle.spill.compress=true \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${api_name} ${num_partitions_val} ${is_raw} ${data_path_val} ${output_path} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/fraudar_run.sh b/tools/kal-test/bin/graph/fraudar_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c855eec141c1e47cd34d27908c1e202f7fa58eba
--- /dev/null
+++ b/tools/kal-test/bin/graph/fraudar_run.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: alpha,amazon,otc"
+ echo "2nd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+is_raw=$2
+
+if [ $dataset_name != 'alpha' ] && [ $dataset_name != 'amazon' ] && [ $dataset_name != 'otc' ];
+then
+ echo 'invalid dataset'
+ echo "dataset name: alpha or amazon or otc"
+ exit 0
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${cpu_name}-${is_raw}
+
+source conf/graph/fraudar/fraudar_spark.properties
+num_executors_val="numExecutors_${dataset_name}_${cpu_name}"
+executor_cores_val="executorCores_${dataset_name}_${cpu_name}"
+executor_memory_val="executorMemory_${dataset_name}_${cpu_name}"
+executor_extra_javaopts_val="executorExtraJavaopts_${dataset_name}_${cpu_name}"
+
+master_val="master"
+deploy_mode_val="deployMode"
+driver_memory_val="driverMemory"
+num_executors=${!num_executors_val}
+executor_cores=${!executor_cores_val}
+executor_memory=${!executor_memory_val}
+master=${!master_val}
+driver_memory=${!driver_memory_val}
+deploy_mode=${!deploy_mode_val}
+executor_extra_javaopts=${!executor_extra_javaopts_val}
+if [ ! ${num_executors} ] \
+ || [ ! ${executor_cores} ] \
+ || [ ! ${executor_memory} ] \
+ || [ ! ${master} ]; then
+ echo "Some values are NUll, please confirm with the property files"
+ exit 0
+fi
+echo "${master_val}:${master}"
+echo "${deploy_mode_val}:${deploy_mode}"
+echo "${num_executors_val}:${num_executors}"
+echo "${executor_cores_val}:${executor_cores}"
+echo "${executor_memory_val}:${executor_memory}"
+echo "${executor_extra_javaopts_val}:${executor_extra_javaopts}"
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+input_path=${!dataset_name}
+
+iset_out_path="/tmp/graph/result/fraudar/${is_raw}/${dataset_name}_i"
+jset_out_path="/tmp/graph/result/fraudar/${is_raw}/${dataset_name}_j"
+echo "${dataset_name}: ${input_path}"
+echo ""outputPath:${iset_out_path},${jset_out_path}""
+echo "start to clean exist output"
+hdfs dfs -rm -r -f ${iset_out_path}
+hdfs dfs -rm -r -f ${jset_out_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- fraudar-${dataset_name}"
+
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.FraudarRunner \
+ --master ${master} \
+ --deploy-mode ${deploy_mode} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf spark.shuffle.blockTransferService=nio \
+ --conf spark.driver.maxResultSize=100g \
+ --conf spark.shuffle.manager=SORT \
+ --conf spark.broadcast.blockSize=4m \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.rdd.compress=true \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_javaopts}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${input_path} ${iset_out_path} ${jset_out_path} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.graph.FraudarRunner \
+ --deploy-mode ${deploy_mode} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf spark.shuffle.blockTransferService=nio \
+ --conf spark.driver.maxResultSize=100g \
+ --conf spark.shuffle.manager=SORT \
+ --conf spark.broadcast.blockSize=4m \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.rdd.compress=true \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_javaopts}" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${input_path} ${iset_out_path} ${jset_out_path} | tee ./log/log
+fi
diff --git a/tools/kal-test/bin/graph/inccc_run.sh b/tools/kal-test/bin/graph/inccc_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..aae6d24e1ba2fcc922200c6121f2da62b365f880
--- /dev/null
+++ b/tools/kal-test/bin/graph/inccc_run.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: graph500_26, com_Friendster, webbase_2001"
+ echo "2nd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ]; then
+ usage
+ exit 0
+fi
+
+source conf/graph/inccc/inccc_spark.properties
+
+# 增量数据模拟,增量图和存量图的根路径
+inc_data_root_path=/tmp/graph/incCC/data
+# 存量CC的根路径
+orgcc_root_path=/tmp/graph/incCC/orgCC
+rate=0.01
+
+dataset_name=$1
+is_raw=$2
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${cpu_name}-${is_raw}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${dataset_name}_${cpu_name}
+executor_cores="executorCores_"${dataset_name}_${cpu_name}
+executor_memory="executorMemory_"${dataset_name}_${cpu_name}
+extra_java_options="extraJavaOptions_"${dataset_name}_${cpu_name}
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+deploy_mode_val=${!deploy_mode}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_java_options_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+data_path=${dataset_name}
+data_path_val=${!data_path}
+echo "${dataset_name} : ${data_path_val}"
+
+orgccPath=${orgcc_root_path}/${dataset_name}_${rate}_single
+incgraphPath=${inc_data_root_path}/${dataset_name}_${rate}/inc_${rate}_5
+outputPath="/tmp/graph/result/inccc/${dataset_name}/${is_raw}"
+hdfs dfs -rm -r -f ${outputPath}
+
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- IncConnectedComponents"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+spark-submit \
+ --class com.bigdata.graph.IncConnectedComponentsRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 200g \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf spark.rpc.askTimeout=36000 \
+ --conf spark.rpc.message.maxSize=1000 \
+ --conf spark.akka.timeout=3600 \
+ --conf spark.scheduler.maxRegisteredResourcesWaitingTime=3600000 \
+ --conf spark.worker.timeout=3600 \
+ --conf spark.network.timeout=6000s \
+ --conf spark.storage.blockManagerSlaveTimeoutMs=600000 \
+ --conf spark.shuffle.blockTransferService=nio \
+ --conf spark.driver.maxResultSize=100g \
+ --conf spark.shuffle.manager=SORT \
+ --conf spark.defalut.parallelism=280 \
+ --conf spark.broadcast.blockSize=4m \
+ --conf spark.akka.frameSize=2046 \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.kryoserializer.buffer=48m \
+ --conf spark.core.connection.ack.wait.timeout=60000s \
+ --conf spark.storage.memoryFraction=0.2 \
+ --conf spark.shuffle.memoryFraction=0.6 \
+ --conf spark.rdd.compress=true \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${outputPath} ${orgccPath} ${incgraphPath} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/incpr_run.sh b/tools/kal-test/bin/graph/incpr_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..44f05b20616883cc0f0f594621ec3a29a65e1eff
--- /dev/null
+++ b/tools/kal-test/bin/graph/incpr_run.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: twitter_2010"
+ echo "2nd argument: rate: e.g. 0.001,0.01,0.05"
+ echo "3nd argument: batch: e.g. 1,2,3,4,5"
+ echo "4th argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 4 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+rate=$2
+batch=$3
+is_raw=$4
+
+source conf/graph/incpr/incpr_spark.properties
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+num_executors="numExectuors"
+executor_cores="executorCores"
+executor_memory="executorMemory"
+extra_java_options="extraJavaOptions"
+driver_cores="driverCores"
+driver_memory="driverMemory"
+executor_memory_overhead="execMemOverhead"
+master_="master"
+deploy_mode="deployMode"
+echo $num_executors
+echo $executor_cores
+echo $executor_memory
+echo $extra_java_options
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+deploy_mode_val=${!deploy_mode}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_java_options_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}_${rate}_batch_${batch}
+output_path="${output_path_prefix}/incpr/${is_raw}/${dataset_name}_${rate}_batch_${batch}"
+echo "${dataset_name} : ${data_path_val}"
+echo "output_path : ${output_path}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- incpr-${dataset_name}_${rate}_batch_${batch}"
+if [ ${is_raw} == "no" ]; then
+ spark-submit \
+ --class com.bigdata.graph.IncPageRankRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 100g \
+ --conf spark.driver.maxResultSize=200g \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.kryoserializer.buffer.max=2040m \
+ --conf spark.driver.extraJavaOptions="-Xms100G" \
+ --conf spark.rpc.askTimeout=1000000s \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.network.timeout=1000000s \
+ --conf spark.executor.heartbeatInterval=100000s \
+ --conf spark.rpc.message.maxSize=1000 \
+ --conf spark.memory.fraction=0.5 \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${data_path_val} ${output_path} ${is_raw} | tee ./log/log
+else
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.TrillionPageRankRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 80g \
+ --conf spark.driver.maxResultSize=80g \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.kryoserializer.buffer.max=2040m \
+ --conf spark.rdd.compress=true \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${data_path_val} ${output_path} no | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/katz_run.sh b/tools/kal-test/bin/graph/katz_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7bbe4d6e46edfae1db7d75835f4af5d0f75d9514
--- /dev/null
+++ b/tools/kal-test/bin/graph/katz_run.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: cit_patents, uk_2002, arabic_2005"
+ echo "2nd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ]; then
+ usage
+ exit 0
+fi
+
+source conf/graph/katz/katz_spark.properties
+
+dataset_name=$1
+is_raw=$2
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${cpu_name}-${is_raw}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${dataset_name}_${cpu_name}
+executor_cores="executorCores_"${dataset_name}_${cpu_name}
+executor_memory="executorMemory_"${dataset_name}_${cpu_name}
+extra_java_options="extraJavaOptions_"${dataset_name}_${cpu_name}
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+deploy_mode_val=${!deploy_mode}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_java_options_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+data_path=${dataset_name}
+data_path_val=${!data_path}
+echo "${dataset_name} : ${data_path_val}"
+
+outputPath="/tmp/graph/result/katz/${dataset_name}/${is_raw}"
+hdfs dfs -rm -r -f ${outputPath}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- KatzCentrality"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+spark-submit \
+ --class com.bigdata.graph.KatzCentralityRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 200g \
+ --conf spark.scheduler.maxRegisteredResourcesWaitingTime=3600000 \
+ --conf spark.akka.frameSize=2046 \
+ --conf spark.worker.timeout=3600 \
+ --conf spark.driver.maxResultSize=200g \
+ --conf spark.shuffle.manager=SORT \
+ --conf spark.broadcast.blockSize=4m \
+ --conf spark.akka.frameSize=3600 \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.kryoserializer.buffer=48m \
+ --conf spark.core.connection.ack.wait.timeout=60000s \
+ --conf spark.storage.memoryFraction=0.2 \
+ --conf spark.shuffle.memoryFraction=0.6 \
+ --conf spark.rdd.compress=true \
+ --conf spark.shuffle.blockTransferService=nio \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.network.timeout=6000s \
+ --conf spark.rpc.message.maxSize=1000 \
+ --conf spark.kryoserializer.buffer.max=2047m \
+ --conf spark.defalut.parallelism=340 \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${outputPath} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/kcore_run.sh b/tools/kal-test/bin/graph/kcore_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fbdd90ef7cf0eedffcbb532bb707a7f9585f3244
--- /dev/null
+++ b/tools/kal-test/bin/graph/kcore_run.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: dataset name: graph500_22, graph500_23, graph500_25, graph500_26"
+ echo "2nd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+is_raw=$2
+
+if [ $dataset_name != 'graph500_22' ] && [ $dataset_name != 'graph500_23' ] && [ $dataset_name != 'graph500_25' ] && [ $dataset_name != 'graph500_26' ];
+then
+ echo 'invalid dataset'
+ echo 'dataset name: graph500_22 or graph500_23 or graph500_25 or graph500_26'
+ exit 0
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+source conf/graph/kcore/kcore_spark.properties
+num_executors_val="numExecutors_${dataset_name}_${cpu_name}"
+executor_cores_val="executorCores_${dataset_name}_${cpu_name}"
+executor_memory_val="executorMemory_${dataset_name}_${cpu_name}"
+executor_extra_javaopts_val="executorExtraJavaopts_${dataset_name}_${cpu_name}"
+executor_emoryOverhead_val="executorMemoryOverhead_${dataset_name}_${cpu_name}"
+
+master_val="master"
+deploy_mode_val="deployMode"
+driver_memory_val="driverMemory"
+num_executors=${!num_executors_val}
+executor_cores=${!executor_cores_val}
+executor_memory=${!executor_memory_val}
+master=${!master_val}
+driver_memory=${!driver_memory_val}
+deploy_mode=${!deploy_mode_val}
+executor_extra_javaopts=${!executor_extra_javaopts_val}
+executor_emoryOverhead=${!executor_emoryOverhead_val}
+if [ ! ${num_executors} ] \
+ || [ ! ${executor_cores} ] \
+ || [ ! ${executor_memory} ] \
+ || [ ! ${master} ]; then
+ echo "Some values are NUll, please confirm with the property files"
+ exit 0
+fi
+echo "${master_val}:${master}"
+echo "${deploy_mode_val}:${deploy_mode}"
+echo "${num_executors_val}:${num_executors}"
+echo "${executor_cores_val}:${executor_cores}"
+echo "${executor_memory_val}:${executor_memory}"
+echo "${executor_extra_javaopts_val}:${executor_extra_javaopts}"
+echo "${executor_emoryOverhead_val}:${executor_emoryOverhead}"
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+input_path=${!dataset_name}
+output_path="${output_path_prefix}/kcore/${is_raw}/${dataset_name}"
+echo "${dataset_name}: ${input_path},${output_path}"
+
+echo "start to clean exist output"
+hdfs dfs -rm -r -f -skipTrash ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- kcore-${dataset_name}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.KCoreDecompositionRunner \
+ --deploy-mode ${deploy_mode} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_javaopts}" \
+ --conf spark.driver.maxResultSize=200g \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.executor.memoryOverhead=${executor_emoryOverhead} \
+ --jars "lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${input_path} ${output_path} ${is_raw} ${cpu_name} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.graph.KCore \
+ --deploy-mode ${deploy_mode} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_javaopts}" \
+ --conf spark.driver.maxResultSize=200g \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.executor.memoryOverhead=${executor_emoryOverhead} \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${input_path} ${output_path} ${is_raw} ${cpu_name} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/kcore_run_hive.sh b/tools/kal-test/bin/graph/kcore_run_hive.sh
new file mode 100644
index 0000000000000000000000000000000000000000..efc9572e9daa21b99abecfda2a6452ac1c5419c8
--- /dev/null
+++ b/tools/kal-test/bin/graph/kcore_run_hive.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+set -e
+
+case "$1" in
+-h | --help | ?)
+ echo "Usage: "
+ exit 0
+ ;;
+esac
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+if [ ${cpu_name} == "aarch64" ]
+then
+ cpu_name="aarch_64"
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+table_name=$1
+col1=$2
+col2=$3
+partition=$4
+save_mode=$5
+save_arg=$6
+
+echo "table_name: $table_name"
+echo "col1: $col1"
+echo "col2: $col2"
+echo "partition: $partition"
+echo "save_mode: $save_mode"
+echo "save_arg: $save_arg"
+
+spark-submit \
+--class com.bigdata.graph.KCoreDecompositionHiveRunner \
+--deploy-mode "client" \
+--driver-memory "16g" \
+--num-executors 35 \
+--executor-cores 4 \
+--executor-memory "25g" \
+--conf spark.driver.maxResultSize=200g \
+--conf spark.locality.wait.node=0 \
+--conf spark.executor.memoryOverhead=10240 \
+--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+--conf spark.rdd.compress=true \
+--conf spark.shuffle.compress=true \
+--conf spark.shuffle.spill.compress=true \
+--conf spark.io.compression.codec=lz4 \
+--jars "./lib/kal-test_${scala_version_val}-0.1.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--driver-class-path "lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+./lib/kal-test_${scala_version_val}-0.1.jar ${table_name} ${col1} ${col2} ${partition} ${save_mode} ${save_arg}
diff --git a/tools/kal-test/bin/graph/louvain_run.sh b/tools/kal-test/bin/graph/louvain_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2e3421e9b314698a2f10c056278a5d3653bacf2d
--- /dev/null
+++ b/tools/kal-test/bin/graph/louvain_run.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: graph500_22,graph500_24,graph500_25,cit_patents,uk_2002,arabic_2005"
+ echo "2nd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ];then
+ alg_usage
+ exit 0
+fi
+
+source conf/graph/louvain/louvain_spark.properties
+
+dataset_name=$1
+is_raw=$2
+
+if [ ${dataset_name} != "graph500_22" ] &&
+ [ ${dataset_name} != "graph500_24" ] &&
+ [ ${dataset_name} != "graph500_25" ] &&
+ [ ${dataset_name} != "cit_patents" ] &&
+ [ ${dataset_name} != "uk_2002" ] &&
+ [ ${dataset_name} != "arabic_2005" ];then
+ echo "invalid dataset name,dataset name:graph500_22,graph500_24,graph500_25,cit_patents,uk_2002,arabic_20055"
+ exit 1
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+num_executors="${dataset_name}_numExecutors_${cpu_name}"
+executor_cores="${dataset_name}_executorCores_${cpu_name}"
+executor_memory="${dataset_name}_executorMemory_${cpu_name}"
+extra_java_options="${dataset_name}_extraJavaOptions_${cpu_name}"
+num_partitions="${dataset_name}_numPartitions_${cpu_name}"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+deploy_mode_val=${!deploy_mode}
+num_partitions_val=${!num_partitions}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${num_partitions} : ${num_partitions_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_java_options_val} ] ||
+ [ ! ${num_partitions_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+output_path="${output_path_prefix}/louvain/${is_raw}/${dataset_name}"
+echo "${dataset_name} : ${data_path_val}"
+echo "output_path : ${output_path}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- louvain-${dataset_name}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.LouvainRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 16g \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf spark.scheduler.maxRegisteredResourcesWaitingTime=3600000 \
+ --conf spark.worker.timeout=3600 \
+ --conf spark.driver.maxResultSize=200g \
+ --conf spark.rpc.askTimeout=36000 \
+ --conf spark.rdd.compress=true \
+ --conf spark.network.timeout=6000s \
+ --conf spark.broadcast.blockSize=4m \
+ --conf spark.shuffle.manager=SORT \
+ --conf spark.shuffle.blockTransferService=nio \
+ --conf spark.locality.wait.node=0 \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${num_partitions_val} ${is_raw} ${data_path_val} ${output_path}
+else
+ community_output=${output_path}/community
+ modularity_output=${output_path}/modularity
+
+ spark-submit \
+ --class com.huawei.graph.algorithms.open.LouvainByGraphx \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 16g \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf spark.scheduler.maxRegisteredResourcesWaitingTime=3600000 \
+ --conf spark.worker.timeout=3600 \
+ --conf spark.driver.maxResultSize=200g \
+ --conf spark.rpc.askTimeout=36000 \
+ --conf spark.rdd.compress=true \
+ --conf spark.network.timeout=6000s \
+ --conf spark.broadcast.blockSize=4m \
+ --conf spark.shuffle.manager=SORT \
+ --conf spark.shuffle.blockTransferService=nio \
+ --conf spark.locality.wait.node=0 \
+ ./lib/louvain_2.11-0.1.0_open_sourced.jar yarn ${data_path_val} ${community_output} ${modularity_output} " " ${num_partitions_val} 2000 > louvain_temp.log
+ costTime=$(cat louvain_temp.log |grep "cost_time:" | awk '{print $2}')
+ modularity=$(cat louvain_temp.log |grep "modularity:" | awk '{print $2}')
+ currentTime=$(date "+%Y%m%d_H%M%S")
+ rm -rf louvain_temp.log
+ echo -e "algorithmName: Louvain\ncostTime: $costTime\ndatasetName: ${dataset_name}\nisRaw: 'yes'\nmodularity: ${modularity}\ntestcaseType: Louvain_opensource_${dataset_name}\n" > ./report/"Louvain_${currentTime}.yml"
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/louvain_run_hive.sh b/tools/kal-test/bin/graph/louvain_run_hive.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c65172599ea849c8a2e6031e40110356cadf7230
--- /dev/null
+++ b/tools/kal-test/bin/graph/louvain_run_hive.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+set -e
+
+case "$1" in
+-h | --help | ?)
+ echo "Usage: "
+ exit 0
+ ;;
+esac
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+if [ ${cpu_name} == "aarch64" ]
+then
+ cpu_name="aarch_64"
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+table_name=$1
+col1=$2
+col2=$3
+colWeight=$4
+iterNum=$5
+isDirected=$6
+partition=$7
+save_mode=$8
+save_arg=$9
+
+echo "table_name: $table_name"
+echo "col1: $col1"
+echo "colWeight: $colWeight"
+echo "iterNum: $iterNum"
+echo "isDirected: $isDirected"
+echo "partition: $partition"
+echo "save_mode: $save_mode"
+echo "save_arg: $save_arg"
+
+spark-submit \
+--class com.bigdata.graph.LouvainHiveRunner \
+--master yarn \
+--deploy-mode "client" \
+--num-executors 35 \
+--executor-memory "25g" \
+--executor-cores 8 \
+--driver-memory "16g" \
+--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3600000 \
+--conf spark.worker.timeout=3600 \
+--conf spark.driver.maxResultSize=200g \
+--conf spark.rpc.askTimeout=36000 \
+--conf spark.network.timeout=6000s \
+--conf spark.broadcast.blockSize=4m \
+--conf spark.shuffle.manager=SORT \
+--conf spark.shuffle.blockTransferService=nio \
+--conf spark.locality.wait.node=0 \
+--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+--conf spark.rdd.compress=true \
+--conf spark.shuffle.compress=true \
+--conf spark.shuffle.spill.compress=true \
+--conf spark.io.compression.codec=lz4 \
+--jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+./lib/kal-test_${scala_version_val}-0.1.jar ${table_name} ${col1} ${col2} ${colWeight} ${iterNum} ${isDirected} ${partition} ${save_mode} ${save_arg}
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/lpa_run.sh b/tools/kal-test/bin/graph/lpa_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b6ec3bebcd2086f9c9633475744e997a864531b4
--- /dev/null
+++ b/tools/kal-test/bin/graph/lpa_run.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: graph500_22,graph500_24,graph500_25"
+ echo "2nd argument: api: run,runConvergence"
+ echo "3rd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+api=$2
+is_raw=$3
+
+if [ $api != "run" ] && [ $api != "runConvergence" ];
+then
+ echo "invalid api."
+ echo "api: run or runConvergence"
+ exit 0
+fi
+
+if [ $dataset_name != 'graph500_22' ] && [ $dataset_name != 'graph500_24' ] && [ $dataset_name != 'graph500_25' ];
+then
+ echo 'invalid dataset'
+ echo "dataset name: graph500_22 or graph500_24 or graph500_25"
+ exit 0
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+source conf/graph/lpa/lpa_spark.properties
+num_executors_val="numExecutors_${dataset_name}_${cpu_name}"
+executor_cores_val="executorCores_${dataset_name}_${cpu_name}"
+executor_memory_val="executorMemory_${dataset_name}_${cpu_name}"
+executor_extra_javaopts_val="executorExtraJavaopts_${dataset_name}_${cpu_name}"
+
+master_val="master"
+deploy_mode_val="deployMode"
+driver_memory_val="driverMemory"
+num_executors=${!num_executors_val}
+executor_cores=${!executor_cores_val}
+executor_memory=${!executor_memory_val}
+master=${!master_val}
+driver_memory=${!driver_memory_val}
+deploy_mode=${!deploy_mode_val}
+executor_extra_javaopts=${!executor_extra_javaopts_val}
+if [ ! ${num_executors} ] \
+ || [ ! ${executor_cores} ] \
+ || [ ! ${executor_memory} ] \
+ || [ ! ${master} ]; then
+ echo "Some values are NUll, please confirm with the property files"
+ exit 0
+fi
+echo "${master_val}:${master}"
+echo "${deploy_mode_val}:${deploy_mode}"
+echo "${num_executors_val}:${num_executors}"
+echo "${executor_cores_val}:${executor_cores}"
+echo "${executor_memory_val}:${executor_memory}"
+echo "${executor_extra_javaopts_val}:${executor_extra_javaopts}"
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+input_path=${!dataset_name}
+output_path="${output_path_prefix}/lpa/${is_raw}/${dataset_name}/${api}"
+echo "${dataset_name}: ${input_path},${output_path}"
+
+echo "start to clean exist output"
+hdfs dfs -rm -r -f -skipTrash ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- lpa-${api}-${dataset_name}"
+
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.LabelPropagationRunner \
+ --deploy-mode ${deploy_mode} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_javaopts}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${input_path} ${output_path} ${api} ${is_raw} ${cpu_name} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.graph.LabelPropagationRunner \
+ --deploy-mode ${deploy_mode} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_javaopts}" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${input_path} ${output_path} "run" ${is_raw} ${cpu_name} | tee ./log/log
+fi
diff --git a/tools/kal-test/bin/graph/mce_run.sh b/tools/kal-test/bin/graph/mce_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..964698013f537a272f044fd99bd2b4d6b4b9aadd
--- /dev/null
+++ b/tools/kal-test/bin/graph/mce_run.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage:"
+ echo "dataset name: graph500_23, graph500_24, graph500_25"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 1 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+source conf/graph/mce/mce_spark.properties
+num_executors_val="numExecutors_${cpu_name}"
+executor_cores_val="executorCores"
+executor_memory_val="executorMemory_${cpu_name}"
+extra_java_options_val="extraJavaOptions_${cpu_name}"
+deploy_mode_val="deployMode"
+driver_memory_val="driverMemory"
+num_executors=${!num_executors_val}
+executor_cores=${!executor_cores_val}
+executor_memory=${!executor_memory_val}
+extra_java_options=${!extra_java_options_val}
+driver_memory=${!driver_memory_val}
+deploy_mode=${!deploy_mode_val}
+
+echo "${deploy_mode_val}:${deploy_mode}"
+echo "${driver_memory_val}:${driver_memory}"
+echo "${num_executors_val}:${num_executors}"
+echo "${executor_cores_val}:${executor_cores}"
+echo "${executor_memory_val}:${executor_memory}"
+echo "${extra_java_options_val}:${extra_java_options}"
+if [ ! ${num_executors} ] \
+ || [ ! ${executor_cores} ] \
+ || [ ! ${executor_memory} ] \
+ || [ ! ${driver_memory} ] \
+ || [ ! ${extra_java_options} ] \
+ || [ ! ${deploy_mode} ]; then
+ echo "Some values are NUll, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+input_path=${!dataset_name}
+output_path="${output_path_prefix}/mce/${dataset_name}"
+echo "${dataset_name} : ${input_path}"
+echo "outputPath : ${output_path}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean exist output"
+hdfs dfs -rm -r -f -skipTrash ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+echo "start to submit spark jobs -- mce-${dataset_name}"
+spark-submit \
+--class com.bigdata.graph.MaximalCliqueEnumerationRunner \
+--deploy-mode ${deploy_mode} \
+--driver-memory ${driver_memory} \
+--num-executors ${num_executors} \
+--executor-cores ${executor_cores} \
+--executor-memory ${executor_memory} \
+--conf "spark.executor.extraJavaOptions=${extra_java_options}" \
+--conf spark.locality.wait=10 \
+--conf spark.rdd.compress=false \
+--conf spark.io.compression.codec=lz4 \
+--jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--driver-class-path "lib/fastutil-8.3.1.jar:lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${input_path} ${output_path} | tee ./log/log
+
+
+
+
+
diff --git a/tools/kal-test/bin/graph/mce_run_hive.sh b/tools/kal-test/bin/graph/mce_run_hive.sh
new file mode 100644
index 0000000000000000000000000000000000000000..59cc0f64d5b3490c627e814febbb5ae49757eb01
--- /dev/null
+++ b/tools/kal-test/bin/graph/mce_run_hive.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+set -e
+case "$1" in
+-h | --help | ?)
+ echo "Usage: "
+ exit 0
+ ;;
+esac
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+if [ ${cpu_name} == "aarch64" ]
+then
+ cpu_name="aarch_64"
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+table_name=$1
+col1=$2
+col2=$3
+minK=$4
+maxDegree=$5
+partition=$6
+save_mode=$7
+save_arg=$8
+
+echo "table_name: $table_name"
+echo "col1: $col1"
+echo "col2: $col2"
+echo "minK: $minK"
+echo "maxDegree: $maxDegree"
+echo "partition: $partition"
+echo "save_mode: $save_mode"
+echo "save_arg: $save_arg"
+
+spark-submit \
+--class com.bigdata.graph.MaximalCliqueEnumerationHiveRunner \
+--master yarn \
+--deploy-mode "client" \
+--driver-memory "80g" \
+--num-executors 59 \
+--executor-cores 4 \
+--executor-memory "15g" \
+--conf spark.locality.wait=10 \
+--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+--conf spark.rdd.compress=true \
+--conf spark.shuffle.compress=true \
+--conf spark.shuffle.spill.compress=true \
+--conf spark.io.compression.codec=lz4 \
+--jars "lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+./lib/kal-test_${scala_version_val}-0.1.jar ${table_name} ${col1} ${col2} ${minK} ${maxDegree} ${partition} ${save_mode} ${save_arg}
+
+
+
+
+
diff --git a/tools/kal-test/bin/graph/modularity_run.sh b/tools/kal-test/bin/graph/modularity_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..168e1eeb724cbec8dd5f7b39de8618b41ca35379
--- /dev/null
+++ b/tools/kal-test/bin/graph/modularity_run.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "please input 1 argument: "
+ echo "1st argument: name of dataset: graph500_23, graph500_25, graph500_26, uk_2002, arabic_2005, twitter"
+ echo "2nd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+is_raw=$2
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+source conf/graph/modularity/modularity_spark.properties
+# concatnate strings as a new variable
+num_executors="${dataset_name}_numExectuors_${cpu_name}"
+executor_cores="${dataset_name}_executorCores_${cpu_name}"
+executor_memory="${dataset_name}_executorMemory_${cpu_name}"
+extra_java_options="${dataset_name}_extraJavaOptions_${cpu_name}"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_java_options_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+input_community="${dataset_name}_community"
+input_community_val=${!input_community}
+echo "${dataset_name} : ${data_path_val}"
+echo "input_community : ${input_community_val}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- modularity-${dataset_name}"
+
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.ModularityRunner \
+ --driver-memory 80g \
+ --master yarn \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${data_path_val} ${input_community_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.huawei.graph.algorithms.ModularityComputeByNovel \
+ --master yarn \
+ --deploy-mode client \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 200g \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf spark.scheduler.maxRegisteredResourcesWaitingTime=3600000 \
+ --conf spark.rpc.message.maxSize=2046 \
+ --conf spark.worker.timeout=3600 \
+ --conf spark.driver.maxResultSize=200g \
+ --conf spark.shuffle.manager=SORT \
+ --conf spark.broadcast.blockSize=4m \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.kryoserializer.buffer=48m \
+ --conf spark.core.connection.ack.wait.timeout=60000s \
+ --conf spark.storage.memoryFraction=0.2 \
+ --conf spark.shuffle.memoryFraction=0.6 \
+ --conf spark.rdd.compress=true \
+ ./lib/modularity_2.11-0.1.0_open.jar yarn ${data_path_val} ${input_community_val} " " "," 500 false false > modularity_temp.log
+ costTime=$(cat modularity_temp.log |grep "cost_time:" | awk '{print $2}')
+ modularity=$(cat modularity_temp.log |grep "modularity:" | awk '{print $2}')
+ currentTime=$(date "+%Y%m%d_H%M%S")
+ rm -rf modularity_temp.log
+ echo -e "algorithmName: Modularity\ncostTime: $costTime\ndatasetName: ${dataset_name}\nisRaw: 'yes'\nModularity: ${modularity}\ntestcaseType: Modularity_opensource_${dataset_name}\n" > ./report/"Modularity_${currentTime}.yml"
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/mssp_run.sh b/tools/kal-test/bin/graph/mssp_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..92dd05694e02634188981ef0b9585d306e314161
--- /dev/null
+++ b/tools/kal-test/bin/graph/mssp_run.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: soc_liveJournal,uk_2002,arabic_2005"
+ echo "2nd argument: source number: 5/50"
+ echo "3rd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+source_num=$2
+is_raw=$3
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+source conf/graph/mssp/mssp_spark.properties
+num_executors_val=${numExectuors}
+executor_cores_val=${executorCores}
+executor_memory_val=${executorMemory}
+driver_memory_val=${driverMemory}
+extra_java_options_val=${extraJavaOptions}
+compute_partition_val=${computePartition}
+split=${splitGraph}
+
+echo "numExectuors : ${num_executors_val}"
+echo "executorCores: ${executor_cores_val}"
+echo "executorMemory : ${executor_memory_val}"
+echo "driverMemory : ${driver_memory_val}"
+echo "extraJavaOptions : ${extra_java_options_val}"
+echo "computePartition : ${compute_partition_val}"
+echo "splitGraph : ${split}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${driver_memory_val} ] ||
+ [ ! ${extra_java_options_val} ] ||
+ [ ! ${compute_partition_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${split} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+source_path=${dataset_name}_${source_num}
+source_path_val=${!source_path}
+output_path="${output_path_prefix}/mssp/${is_raw}/${dataset_name}_${source_num}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "${dataset_name} : ${data_path_val}"
+echo "${source_path} : ${source_path_val}"
+echo "outputPath : ${output_path}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- mssp-${dataset_name0}-${source_num}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.MSSPRunner \
+ --master yarn \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf spark.driver.maxResultSize=100g \
+ --jars "lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name}_${source_num} ${compute_partition_val} ${data_path_val} ${output_path} ${source_path_val} ${split} ${is_raw} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.graph.MSSPRunner \
+ --master yarn \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf spark.driver.maxResultSize=100g \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name}_${source_num} ${compute_partition_val} ${data_path_val} ${output_path} ${source_path_val} ${split} ${is_raw} | tee ./log/log
+fi
diff --git a/tools/kal-test/bin/graph/node2vec_run.sh b/tools/kal-test/bin/graph/node2vec_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..eeaf4cdb8e6a0e230fec02ca7d029c89dca189ba
--- /dev/null
+++ b/tools/kal-test/bin/graph/node2vec_run.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: cit_patents,soc_liveJournal,uk_2002"
+ echo "2nd argument: optimization algorithm or raw: no/yes"
+ echo "3rd argument: verify result: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ];then
+ alg_usage
+ exit 0
+fi
+
+source conf/graph/node2vec/node2vec_spark.properties
+
+dataset_name=$1
+is_raw=$2
+is_check=$3
+
+if [ ${dataset_name} != "cit_patents" ] &&
+ [ ${dataset_name} != "soc_liveJournal" ] &&
+ [ ${dataset_name} != "uk_2002" ] ;then
+ echo "invalid dataset name,dataset name:cit_patents,soc_liveJournal,uk_2002"
+ exit 1
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${cpu_name}
+
+# concatnate strings as a new variable
+master_="master"
+deploy_mode="deployMode"
+driver_memory="driverMemory"
+driver_cores="driverCores_${cpu_name}"
+executor_cores="executorCores_${cpu_name}"
+executor_memory="executorMemory_${cpu_name}"
+num_executors="numExecutors_${cpu_name}"
+
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+driver_memory_val=${!driver_memory}
+driver_cores_val=${!driver_cores}
+
+echo "${cpu_name}"
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${driver_memory}:${driver_memory_val}"
+echo "${driver_cores}:${driver_cores_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${driver_memory_val} ] ||
+ [ ! ${driver_cores_val} ] ; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+gt_path="${dataset_name}_negEdge"
+data_path_val=${!dataset_name}
+gt_path_val=${!gt_path}
+output_path="${output_path_prefix}/node2vec/${is_raw}/${dataset_name}"
+echo "${dataset_name} : ${data_path_val}"
+echo "output_path : ${output_path}"
+echo "gt_path : ${gt_path_val}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs--node2vec_${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.Node2VecRunner \
+ --master ${master_val} \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --driver-cores ${driver_cores_val} \
+ --conf spark.kryoserializer.buffer.max=2047m \
+ --conf spark.ui.showConsoleProgress=true \
+ --conf spark.driver.maxResultSize=0 \
+ --conf spark.driver.extraJavaOptions="-Xms300G -XX:hashCode=0" \
+ --conf spark.executor.extraJavaOption="-Xms315G -XX:hashCode=0" \
+ --conf spark.rpc.askTimeout=1000000s \
+ --conf spark.network.timeout=1000000s \
+ --conf spark.executor.heartbeatInterval=100000s \
+ --conf spark.rpc.message.maxSize=1000 \
+ --jars "lib/smile-core-2.5.3.jar,lib/smile-math-2.5.3.jar,lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar,lib/boostkit-graph-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${output_path} ${gt_path_val} ${is_check} | tee ./log/log
+else
+ spark-submit \
+ --class vn.five9.Main \
+ --master ${master_val} \
+ --name "Node2Vec_${model_conf}" \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --driver-cores ${driver_cores_val} \
+ --conf spark.kryoserializer.buffer.max=2047m \
+ --conf spark.ui.showConsoleProgress=true \
+ --conf spark.driver.maxResultSize=0 \
+ --conf spark.driver.extraJavaOptions="-Xms300G -XX:hashCode=0" \
+ --conf spark.executor.extraJavaOption="-Xms315G -XX:hashCode=0" \
+ --conf spark.rpc.askTimeout=1000000s \
+ --conf spark.network.timeout=1000000s \
+ --conf spark.executor.heartbeatInterval=100000s \
+ --conf spark.rpc.message.maxSize=1000 \
+ --jars "lib/fastutil-8.3.1.jar,lib/spark-mllib_2.11-2.3.2.jar,lib/smile-core-2.5.3.jar,lib/scopt_2.11-3.5.0.jar,lib/smile-math-2.5.3.jar" \
+ ./lib/node2vec-baseline.jar \
+ --cmd node2vec --indexed true --directed true --degree 1000000000 \
+ --p 1.0 --q 1.0 --walkLength 5 --numWalks 10 \
+ --input ${data_path_val} --output ${output_path} > node2vec_tmp.log
+
+ CostTime=$(cat node2vec_tmp.log |grep "total time" | awk '{print $7}')
+ currentTime=$(date "+%Y%m%d_H%M%S")
+ rm -rf node2vec_tmp.log
+ echo -e "algorithmName: Node2vec\ncostTime: $CostTime\ndatasetName: ${dataset_name}\nisRaw: 'yes'\ntestcaseType: Node2vec_opensource_${dataset_name}\n" > ./report/"Node2vec_${currentTime}.yml"
+ if [ $? -eq 0 ];then
+ echo "Exec Successful: end." > ./log/log
+ else
+ echo "Exec Failure: please check the code" > ./log/log
+ fi
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/ppr_run.sh b/tools/kal-test/bin/graph/ppr_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..09649a01c7331ffa19ff3e0b2ebc76eff1cf5774
--- /dev/null
+++ b/tools/kal-test/bin/graph/ppr_run.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: cit_patents,uk_2002,arabic_2005"
+ echo "2nd argument: name of api: fixMS,fixSS,conSS"
+ echo "3rd argument: optimization algorithm or raw: no/yes"
+ echo "4th argument: sourceCnt or null: 1,5,10,50,100"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ] && [ $# -ne 4 ];then
+ alg_usage
+ exit 0
+fi
+
+source conf/graph/ppr/ppr_spark.properties
+
+dataset_name=$1
+api_name=$2
+is_raw=$3
+
+if [ ${dataset_name} != "cit_patents" ] &&
+ [ ${dataset_name} != "uk_2002" ] &&
+ [ ${dataset_name} != "arabic_2005" ] ;then
+ echo "invalid dataset name,dataset name:cit_patents,uk_2002,arabic_2005"
+ exit 1
+fi
+if [ ${api_name} != "fixMS" ] &&
+ [ ${api_name} != "fixSS" ] &&
+ [ ${api_name} != "conSS" ] ;then
+ echo "invalid argument value,api name: fixMS,fixSS,conSS"
+ exit 1
+fi
+
+if [ $# -eq 4 ]; then
+ src=$4
+ if [ ${src} != "1" ] &&
+ [ ${src} != "5" ] &&
+ [ ${src} != "10" ] &&
+ [ ${src} != "50" ] &&
+ [ ${src} != "100" ] ;then
+ echo "invalid argument value,must be: 1, 5, 10, 50 or 100"
+ exit 1
+ fi
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+num_executors="${api_name}_${dataset_name}_numExecutors_${cpu_name}"
+executor_cores="${api_name}_${dataset_name}_executorCores_${cpu_name}"
+executor_memory="${api_name}_${dataset_name}_executorMemory_${cpu_name}"
+num_partitions="${api_name}_${dataset_name}_numPartitions_${cpu_name}"
+extra_Java_Options="${api_name}_${dataset_name}_extraJavaOptions_${cpu_name}"
+deploy_mode="deployMode"
+driver_memory="driverMemory"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+deploy_mode_val=${!deploy_mode}
+num_partitions_val=${!num_partitions}
+driver_memory_val=${!driver_memory}
+extra_Java_Options_val=${!extra_Java_Options}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${num_partitions} : ${num_partitions_val}"
+echo "${driver_memory}:${driver_memory_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_Java_Options_val} ] ||
+ [ ! ${num_partitions_val} ] ; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+function clean_cache() {
+ echo "start to clean cache and sleep 30s"
+ ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+ sleep 30
+}
+
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+fi
+
+if [ ${api_name} == "fixMS" ]; then
+ output_path="${output_path_prefix}/ppr/${is_raw}/${dataset_name}/${api_name}_${src}"
+ hdfs dfs -rm -r -f ${output_path}
+ clean_cache
+ echo "start to submit spark jobs -- ppr-${api_name}_${dataset_name}_${src}"
+ if [ ${is_raw} == "no" ]; then
+ spark-submit \
+ --class com.bigdata.graph.PersonalizedPageRankRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --conf spark.executor.extraJavaOptions=${extra_Java_Options_val} \
+ --conf spark.driver.maxResultSize=80g \
+ --conf spark.driver.extraJavaOptions="-Xms80G" \
+ --conf spark.locality.wait.node=0 \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${api_name} ${num_partitions_val} ${data_path_val} ${is_raw} ${src} ${output_path} | tee ./log/log
+ else
+ spark-submit \
+ --class com.bigdata.graph.PersonalizedPageRankRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --conf spark.executor.extraJavaOptions=${extra_Java_Options_val} \
+ --conf spark.driver.maxResultSize=80g \
+ --conf spark.driver.extraJavaOptions="-Xms80G" \
+ --conf spark.locality.wait.node=0 \
+ --jars "lib/fastutil-8.3.1.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar" \
+ --conf "spark.executor.extraClassPath=fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${api_name} ${num_partitions_val} ${data_path_val} ${is_raw} ${src} ${output_path} | tee ./log/log
+ fi
+else
+ source conf/graph/ppr/ppr_source_id.properties
+ IFS=,
+ source_ids="${dataset_name}_SourceID"
+ source_ids_val=${!source_ids}
+ source_ids_arr=($source_ids_val)
+
+ echo "${source_ids}:${source_ids_val}"
+ for source_id in ${source_ids_arr[@]}
+ do
+ output_path="${output_path_prefix}/ppr/${is_raw}/${dataset_name}/${api_name}_${source_id}"
+ hadoop fs -rm -r -f ${output_path}
+ clean_cache
+ echo "start to submit spark jobs -- ppr-${api_name}_${dataset_name}_${source_id}"
+ if [ ${is_raw} == "no" ]; then
+ spark-submit \
+ --class com.bigdata.graph.PersonalizedPageRankRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --conf spark.executor.extraJavaOptions=${extra_Java_Options_val} \
+ --conf spark.driver.maxResultSize=80g \
+ --conf spark.driver.extraJavaOptions="-Xms80G" \
+ --conf spark.locality.wait.node=0 \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${api_name} ${num_partitions_val} ${data_path_val} ${is_raw} ${source_id} ${output_path} | tee ./log/log
+ else
+ spark-submit \
+ --class com.bigdata.graph.PersonalizedPageRankRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --conf spark.executor.extraJavaOptions=${extra_Java_Options_val} \
+ --conf spark.driver.maxResultSize=80g \
+ --conf spark.driver.extraJavaOptions="-Xms80G" \
+ --conf spark.locality.wait.node=0 \
+ --jars "lib/fastutil-8.3.1.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar" \
+ --conf "spark.executor.extraClassPath=fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${api_name} ${num_partitions_val} ${data_path_val} ${is_raw} ${source_id} ${output_path} | tee ./log/log
+ fi
+ done
+fi
diff --git a/tools/kal-test/bin/graph/pr_run.sh b/tools/kal-test/bin/graph/pr_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..726181932b5f4adb3f7103f4494f710b5dbe9f1d
--- /dev/null
+++ b/tools/kal-test/bin/graph/pr_run.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: cit_patents,uk_2002,arabic_2005"
+ echo "2nd argument: name of api: run,runUntilConvergence"
+ echo "3rd argument: optimization algorithm or raw: no/yes"
+}
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ];then
+ usage
+ exit 0
+fi
+
+source conf/graph/pr/pr_spark.properties
+
+dataset_name=$1
+api_name=$2
+is_raw=$3
+
+if [ ${dataset_name} != "cit_patents" ] && [ ${dataset_name} != "uk_2002" ] && [ ${dataset_name} != "arabic_2005" ];then
+ echo "invalid dataset name,dataset name:cit_patents,or uk_2002,or arabic_2005"
+ exit 1
+fi
+if [ ${api_name} != "run" ] && [ ${api_name} != "runUntilConvergence" ];then
+ echo "invalid api name,api name: run or runUntilConvergence"
+ exit 1
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+prefix="run"
+if [ ${api_name} == "runUntilConvergence" ]
+then
+ prefix="convergence"
+fi
+
+# concatnate strings as a new variable
+num_executors="${prefix}_${dataset_name}_numExecutors_${cpu_name}"
+executor_cores="${prefix}_${dataset_name}_executorCores_${cpu_name}"
+executor_memory="${prefix}_${dataset_name}_executorMemory_${cpu_name}"
+extra_java_options="${prefix}_${dataset_name}_extraJavaOptions_${cpu_name}"
+num_partitions="${prefix}_${dataset_name}_numPartitions_${cpu_name}"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+deploy_mode_val=${!deploy_mode}
+num_partitions_val=${!num_partitions}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${num_partitions} : ${num_partitions_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_java_options_val} ] ||
+ [ ! ${num_partitions_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+output_path="${output_path_prefix}/pr/${is_raw}/${dataset_name}_${api_name}"
+echo "${dataset_name} : ${data_path_val}"
+echo "output_path : ${output_path}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- pr-${dataset_name}-${api_name}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.PageRankRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 100g \
+ --conf spark.driver.maxResultSize=200g \
+ --conf spark.driver.extraJavaOptions="-Xms100G" \
+ --conf spark.locality.wait.node=0 \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${api_name} ${num_partitions_val} ${is_raw} ${data_path_val} ${output_path} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.graph.PageRankRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 100g \
+ --conf spark.driver.maxResultSize=200g \
+ --conf spark.driver.extraJavaOptions="-Xms100G" \
+ --conf spark.locality.wait.node=0 \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${api_name} ${num_partitions_val} ${is_raw} ${data_path_val} ${output_path} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/pr_run_hive.sh b/tools/kal-test/bin/graph/pr_run_hive.sh
new file mode 100644
index 0000000000000000000000000000000000000000..81f4bb370d28d4f3cf50609785d6b15c4399e1e7
--- /dev/null
+++ b/tools/kal-test/bin/graph/pr_run_hive.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+set -e
+
+case "$1" in
+-h | --help | ?)
+ echo "Usage: "
+ exit 0
+ ;;
+esac
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+if [ ${cpu_name} == "aarch64" ]
+then
+ cpu_name="aarch_64"
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+table_name=$1
+col1=$2
+col2=$3
+api=$4
+tol=$5
+resetProb=$6
+numIter=$7
+partition=$8
+save_mode=$9
+save_arg=${10}
+
+echo "table_name: $table_name"
+echo "col1: $col1"
+echo "col2: $col2"
+echo "api: $api"
+echo "tol: $tol"
+echo "resetProb: $resetProb"
+echo "numIter: $numIter"
+echo "partition: $partition"
+echo "save_mode: $save_mode"
+echo "save_arg: $save_arg"
+
+spark-submit \
+--class com.bigdata.graph.PageRankHiveRunner \
+--master yarn \
+--deploy-mode "client" \
+--num-executors 36 \
+--executor-memory "25g" \
+--executor-cores 4 \
+--driver-memory 100g \
+--conf spark.driver.maxResultSize=200g \
+--conf spark.driver.extraJavaOptions="-Xms100G" \
+--conf spark.locality.wait.node=0 \
+--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+--conf spark.rdd.compress=true \
+--conf spark.shuffle.compress=true \
+--conf spark.shuffle.spill.compress=true \
+--conf spark.io.compression.codec=lz4 \
+--jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+./lib/kal-test_${scala_version_val}-0.1.jar ${table_name} ${col1} ${col2} ${api} ${tol} ${resetProb} ${numIter} ${partition} ${save_mode} ${save_arg}
diff --git a/tools/kal-test/bin/graph/scc_run.sh b/tools/kal-test/bin/graph/scc_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5c59caa1b0c3c459b19e1bffa31a569506586205
--- /dev/null
+++ b/tools/kal-test/bin/graph/scc_run.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: cit_patents,enwiki_2018,arabic_2005"
+ echo "2nd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+is_raw=$2
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+num_executors_val="numExecutors_${cpu_name}"
+executor_cores_val="executorCores_${cpu_name}"
+executor_memory_val="executorMemory_${cpu_name}"
+if [ ${dataset_name} == "arabic_2005" ]
+then
+ num_executors_val="numExecutors_${cpu_name}_arabic_2005"
+ executor_cores_val="executorCores_${cpu_name}_arabic_2005"
+ executor_memory_val="executorMemory_${cpu_name}_arabic_2005"
+fi
+master_val="master"
+deploy_mode_val="deployMode"
+driver_memory_val="driverMemory"
+source conf/graph/scc/scc_spark.properties
+num_executors=${!num_executors_val}
+executor_cores=${!executor_cores_val}
+executor_memory=${!executor_memory_val}
+master=${!master_val}
+driver_memory=${!driver_memory_val}
+deploy_mode=${!deploy_mode_val}
+if [ ! ${num_executors} ] \
+ || [ ! ${executor_cores} ] \
+ || [ ! ${executor_memory} ] \
+ || [ ! ${master} ]; then
+ echo "Some values are NUll, please confirm with the property files"
+ exit 0
+fi
+echo "${master_val}:${master}"
+echo "${deploy_mode_val}:${deploy_mode}"
+echo "${num_executors_val}:${num_executors}"
+echo "${executor_cores_val}:${executor_cores}"
+echo "${executor_memory_val}:${executor_memory}"
+echo "executor_extra_javaopts:${executor_extra_javaopts}"
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+input_path=${!dataset_name}
+output_path="${output_path_prefix}/scc/${is_raw}/${dataset_name}"
+echo "${dataset_name}: ${input_path},${output_path}"
+
+echo "start to clean exist output"
+hdfs dfs -rm -r -f -skipTrash ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- scc-${dataset_name}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.StronglyConnectedComponentsRunner \
+ --deploy-mode ${deploy_mode} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.driver.maxResultSize=100g \
+ --conf spark.ui.showConsoleProgress=false \
+ --conf spark.driver.extraJavaOptions="-Xms12g -XX:hashCode=0" \
+ --conf spark.executor.extraJavaOptions="${executor_extra_javaopts}" \
+ --conf spark.rpc.askTimeout=1000000s \
+ --conf spark.network.timeout=1000000s \
+ --conf spark.executor.heartbeatInterval=100000s \
+ --conf spark.rpc.message.maxSize=1000 \
+ --conf spark.memory.fraction=0.24939583270092516 \
+ --conf spark.memory.storageFraction=0.5849745294783253 \
+ --conf spark.broadcast.blockSize=1m \
+ --conf spark.reducer.maxSizeInFlight=59mb \
+ --conf spark.shuffle.file.buffer=17k \
+ --conf spark.io.compression.codec=lzf \
+ --conf spark.shuffle.compress=true \
+ --conf spark.rdd.compress=false \
+ --conf spark.shuffle.io.preferDirectBufs=true \
+ --conf spark.shuffle.spill.compress=true \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${input_path} ${output_path} "run" ${is_raw} ${cpu_name} 300 | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.graph.StronglyConnectedComponentsRunner \
+ --deploy-mode ${deploy_mode} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.driver.maxResultSize=100g \
+ --conf spark.ui.showConsoleProgress=false \
+ --conf spark.driver.extraJavaOptions="-Xms12g -XX:hashCode=0" \
+ --conf spark.executor.extraJavaOptions="${executor_extra_javaopts}" \
+ --conf spark.rpc.askTimeout=1000000s \
+ --conf spark.network.timeout=1000000s \
+ --conf spark.executor.heartbeatInterval=100000s \
+ --conf spark.rpc.message.maxSize=1000 \
+ --conf spark.memory.fraction=0.24939583270092516 \
+ --conf spark.memory.storageFraction=0.5849745294783253 \
+ --conf spark.broadcast.blockSize=1m \
+ --conf spark.reducer.maxSizeInFlight=59mb \
+ --conf spark.shuffle.file.buffer=17k \
+ --conf spark.io.compression.codec=lzf \
+ --conf spark.shuffle.compress=true \
+ --conf spark.rdd.compress=false \
+ --conf spark.shuffle.io.preferDirectBufs=true \
+ --conf spark.shuffle.spill.compress=true \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${input_path} ${output_path} "run" ${is_raw} ${cpu_name} 400 | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/sgm_run.sh b/tools/kal-test/bin/graph/sgm_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d2f5c3689fcc183715088ccb4aa79a8f99849b55
--- /dev/null
+++ b/tools/kal-test/bin/graph/sgm_run.sh
@@ -0,0 +1,237 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: name of dataset: graph500_19,liveJournal,com_orkut"
+ echo "2nd argument: name of queryGraph: for Identical: 4dgn/4sqr/5tree/6star; for unIdentical: 4dgn/4clique/5clique/6clique"
+ echo "3rd argument: match mode:Identical,unIdentical"
+ echo "4th argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 4 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+queryGraph=$2
+match_mode=$3
+is_raw=$4
+
+if [ ${dataset_name} != "graph500_19" ] &&
+ [ ${dataset_name} != "liveJournal" ] &&
+ [ ${dataset_name} != "com_orkut" ] ;then
+ echo "invalid dataset name,dataset name:graph500_19,liveJournal,com_orkut"
+ exit 1
+fi
+if [ ${match_mode} != "Identical" ] &&
+ [ ${match_mode} != "unIdentical" ] ;then
+ echo "invalid argument value,match mode:identical or unidentical"
+ exit 1
+fi
+if [ ${match_mode} == "Identical" ] ; then
+ if [ ${queryGraph} != "4dgn" ] &&
+ [ ${queryGraph} != "4sqr" ] &&
+ [ ${queryGraph} != "5tree" ] &&
+ [ ${queryGraph} != "6star" ] ; then
+ echo "invalid queryGraph,queryGraph name:4dgn,4sqr,5tree,6star"
+ exit 1
+ fi
+elif [ ${match_mode} == "unIdentical" ]; then
+ if [ ${queryGraph} != "4dgn" ] &&
+ [ ${queryGraph} != "4clique" ] &&
+ [ ${queryGraph} != "5clique" ] &&
+ [ ${queryGraph} != "6clique" ] ; then
+ echo "invalid queryGraph,queryGraph name:4dgn,4clique,5clique,6clique"
+ exit 1
+ fi
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+source conf/graph/sgm/sgm_spark.properties
+num_executors="${dataset_name}_${queryGraph}_${match_mode}_numExecutors_${cpu_name}"
+executor_cores="${dataset_name}_${queryGraph}_${match_mode}_executorCores_${cpu_name}"
+executor_memory="${dataset_name}_${queryGraph}_${match_mode}_executorMemory_${cpu_name}"
+num_partitions="${dataset_name}_${queryGraph}_${match_mode}_numPartitions_${cpu_name}"
+extra_Java_Options="${dataset_name}_${queryGraph}_${match_mode}_executorExtraJavaOptions_${cpu_name}"
+num_Task="${dataset_name}_${queryGraph}_${match_mode}_numberTask_${cpu_name}"
+deploy_mode="deployMode"
+driver_memory="driverMemory"
+rpc_askTime="rpcAskTime"
+scheduler_maxRegisteredResourcesWaitingTime="schedulerMaxRegisteredResourcesWaitingTime"
+worker_timeout="workerTimeout"
+network_timeout="networkTimeout"
+storage_blockManagerSlaveTimeoutMs="storageBlockManagerSlaveTimeoutMs"
+shuffle_blockTransferService="shuffleBlockTransferService"
+driver_maxResultSize="driverMaxResultSize"
+shuffle_manager="shuffleManager"
+broadcast_blockSize="broadcastBlockSize"
+rpc_message_maxSize="rpcMessageMaxSize"
+core_connection_ack_wait_timeout="coreConnectionAckWaitTimeout"
+storage_memoryFraction="storageMemoryFraction"
+shuffle_memoryFraction="shuffleMemoryFraction"
+rdd_compress="rddCompress"
+memory_useLegacyMode="memoryUseLegacyMode"
+num_Colors="numberColors"
+graph_Split="${dataset_name}_split"
+
+if [ ${is_raw} == "yes" ]; then
+ num_executors="numExecutors"
+ executor_cores="executorCores"
+ executor_memory="executorMemory"
+ num_partitions="numPartitions"
+ extra_Java_Options="executorExtraJavaOptions"
+fi
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+num_task_val=${!num_Task}
+deploy_mode_val=${!deploy_mode}
+num_partitions_val=${!num_partitions}
+extra_Java_Options_val=${!extra_Java_Options}
+driver_memory_val=${!driver_memory}
+rpc_askTime_val=${!rpc_askTime}
+scheduler_maxRegisteredResourcesWaitingTime_val=${!scheduler_maxRegisteredResourcesWaitingTime}
+worker_timeout_val=${!worker_timeout}
+network_timeout_val=${!network_timeout}
+storage_blockManagerSlaveTimeoutMs_val=${!storage_blockManagerSlaveTimeoutMs}
+shuffle_blockTransferService_val=${!shuffle_blockTransferService}
+driver_maxResultSize_val=${!driver_maxResultSize}
+shuffle_manager_val=${!shuffle_manager}
+broadcast_blockSize_val=${!broadcast_blockSize}
+rpc_message_maxSize_val=${!rpc_message_maxSize}
+core_connection_ack_wait_timeout_val=${!core_connection_ack_wait_timeout}
+storage_memoryFraction_val=${!storage_memoryFraction}
+shuffle_memoryFraction_val=${!shuffle_memoryFraction}
+rdd_compress_val=${!rdd_compress}
+memory_useLegacyMode_val=${!memory_useLegacyMode}
+num_colors_val=${!num_Colors}
+graph_split_val=${!graph_Split}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${num_partitions} : ${num_partitions_val}"
+echo "${driver_memory}:${driver_memory_val}"
+echo "${extra_Java_Options}:${extra_Java_Options_val}"
+echo "${num_Task}:${num_task_val}"
+echo "${num_Colors}:${num_colors_val}"
+echo "${graph_Split}:${graph_split_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${num_partitions_val} ] ||
+ [ ! ${num_task_val} ] ||
+ [ ! ${num_colors_val} ] ||
+ [ ! ${graph_split_val} ] ||
+ [ ! ${extra_Java_Options_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+queryGraph_path="query_${queryGraph}"
+queryGraph_path_val=${!queryGraph_path}
+echo "${dataset_name} : ${data_path_val}"
+echo "${queryGraph_path} : ${queryGraph_path_val}"
+
+output_path="${output_path_prefix}/sgm/${is_raw}/${dataset_name}_${queryGraph}_${match_mode}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs--SGM_${dataset_name}_${queryGraph}_${match_mode}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.SubgraphMatchingRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --conf spark.executor.extraJavaOptions=${extra_Java_Options_val} \
+ --conf spark.rpc.askTime=${rpc_askTime_val} \
+ --conf spark.scheduler.maxRegisteredResourcesWaitingTime=${scheduler_maxRegisteredResourcesWaitingTime_val} \
+ --conf spark.worker.timeout=${worker_timeout_val} \
+ --conf spark.network.timeout=${network_timeout_val} \
+ --conf spark.storage.blockManagerSlaveTimeoutMs=${storage_blockManagerSlaveTimeoutMs_val} \
+ --conf spark.shuffle.blockTransferService=${shuffle_blockTransferService_val} \
+ --conf spark.driver.maxResultSize=${driver_maxResultSize_val} \
+ --conf spark.shuffle.manager=${shuffle_manager_val} \
+ --conf spark.broadcast.blockSize=${broadcast_blockSize_val} \
+ --conf spark.rpc.message.maxSize=${rpc_message_maxSize_val} \
+ --conf spark.core.connection.ack.wait.timeout=${core_connection_ack_wait_timeout_val} \
+ --conf spark.storage.memoryFraction=${storage_memoryFraction_val} \
+ --conf spark.shuffle.memoryFraction=${shuffle_memoryFraction_val} \
+ --conf spark.rdd.compress=${rdd_compress_val} \
+ --conf spark.memory.useLegacyMode=${memory_useLegacyMode_val} \
+ --conf spark.executor.memoryOverhead=5g \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${queryGraph} ${is_raw} ${match_mode} ${output_path} ${data_path_val} ${num_partitions_val} ${num_task_val} ${queryGraph_path_val} | tee ./log/log
+else
+ spark-submit \
+ --class pegasus.spark.subgraph.TestOriginal \
+ --name "SGM_${dataset_name}_${queryGraph}_opensource" \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --conf spark.executor.extraJavaOptions=${extra_Java_Options_val} \
+ --conf spark.rpc.askTime=${rpc_askTime_val} \
+ --conf spark.scheduler.maxRegisteredResourcesWaitingTime=${scheduler_maxRegisteredResourcesWaitingTime_val} \
+ --conf spark.worker.timeout=${worker_timeout_val} \
+ --conf spark.network.timeout=${network_timeout_val} \
+ --conf spark.storage.blockManagerSlaveTimeoutMs=${storage_blockManagerSlaveTimeoutMs_val} \
+ --conf spark.shuffle.blockTransferService=${shuffle_blockTransferService_val} \
+ --conf spark.driver.maxResultSize=${driver_maxResultSize_val} \
+ --conf spark.shuffle.manager=${shuffle_manager_val} \
+ --conf spark.broadcast.blockSize=${broadcast_blockSize_val} \
+ --conf spark.rpc.message.maxSize=${rpc_message_maxSize_val} \
+ --conf spark.core.connection.ack.wait.timeout=${core_connection_ack_wait_timeout_val} \
+ --conf spark.storage.memoryFraction=${storage_memoryFraction_val} \
+ --conf spark.shuffle.memoryFraction=${shuffle_memoryFraction_val} \
+ --conf spark.rdd.compress=${rdd_compress_val} \
+ --conf spark.memory.useLegacyMode=${memory_useLegacyMode_val} \
+ ./lib/pegasus-spark_2.11-0.1.0-SNAPSHOT_openSource.jar yarn ${data_path_val} ${output_path} ${queryGraph_path_val} ${num_colors_val} 232 "," ${graph_split_val} 10000 > sgm_temp.log
+ num_subgraphs=$(cat sgm_temp.log | grep "number of matched subgraphs" | awk -F '[\t]' '{print $2}')
+ costTime=$(cat sgm_temp.log | grep "cost time" | awk -F '[\t]' '{print $2}')
+ currentTime=$(date "+%Y%m%d_H%M%S")
+ rm -rf sgm_temp.log
+ echo -e "algorithmName: SGM\ncostTime: $costTime\ndatasetName: ${dataset_name}\nisRaw: 'yes'\nnum_subgraphs: $num_subgraphs\ntestcaseType: SGM_opensource_${1}_${2}_opensource\n" > ./report/"SGM_${currentTime}.yml"
+ echo "Exec Successful: End." > ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/tc_run.sh b/tools/kal-test/bin/graph/tc_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f85cde62ec6f4a83af1e876363cf3d6f34396ba7
--- /dev/null
+++ b/tools/kal-test/bin/graph/tc_run.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: graph500_22, graph500_23, graph500_24, graph500_25, graph500_26"
+ echo "2nd argument: name of api: run, preCanonical"
+ echo "3rd argument: optimization algorithm or raw: no, yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+api_name=$2
+is_raw=$3
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+source conf/graph/tc/tc_spark.properties
+num_executors_val="numExecutors_${cpu_name}"
+executor_cores_val="executorCores"
+executor_memory_val="executorMemory_${cpu_name}"
+extra_java_options_val="extraJavaOptions_${cpu_name}"
+master_val="master"
+deploy_mode_val="deployMode"
+driver_cores_val="driverCores"
+driver_memory_val="driverMemory"
+num_executors=${!num_executors_val}
+executor_cores=${!executor_cores_val}
+executor_memory=${!executor_memory_val}
+extra_java_options=${!extra_java_options_val}
+master=${!master_val}
+deploy_mode=${!deploy_mode_val}
+driver_cores=${!driver_cores_val}
+driver_memory=${!driver_memory_val}
+if [ ! ${num_executors} ] \
+ || [ ! ${executor_cores} ] \
+ || [ ! ${executor_memory} ] \
+ || [ ! ${deploy_mode} ] \
+ || [ ! ${driver_cores} ] \
+ || [ ! ${driver_memory} ] \
+ || [ ! ${master} ]; then
+ echo "Some values are NUll,please confirm with the property files"
+ exit 0
+fi
+echo "${master_val}:${master}"
+echo "${deploy_mode_val}:${deploy_mode}"
+echo "${num_executors_val}:${num_executors}"
+echo "${executor_cores_val}:${executor_cores}"
+echo "${executor_memory_val}:${executor_memory}"
+echo "${extra_java_options_val}:${extra_java_options}"
+echo "${driver_memory_val}:${driver_memory}"
+echo "${driver_cores_val}:${driver_cores}"
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+input_path=${!dataset_name}
+output_path="${output_path_prefix}/tc/${is_raw}/${dataset_name}_${api_name}"
+echo "${dataset_name} : ${input_path}"
+echo "outputPath : ${output_path}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- tc-${dataset_name}-${api_name}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.TriangleCountRunner \
+ --deploy-mode ${deploy_mode} \
+ --driver-cores ${driver_cores} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options} -XX:SurvivorRatio=4 -XX:ParallelGCThreads=6" \
+ --conf spark.rpc.askTimeout=36000 \
+ --conf spark.scheduler.maxRegisteredResourcesWaitingTime=3600000 \
+ --conf spark.worker.timeout=3600 \
+ --conf spark.network.timeout=6000s \
+ --conf spark.storage.blockManagerSlaveTimeoutMs=600000 \
+ --conf spark.shuffle.blockTransferService=nio \
+ --conf spark.driver.maxResultSize=100g \
+ --conf spark.shuffle.manager=SORT \
+ --conf spark.broadcast.blockSize=25g \
+ --conf spark.rpc.message.maxSize=2046 \
+ --conf spark.core.connection.ack.wait.timeout=60000s \
+ --conf spark.storage.memoryFraction=0.2 \
+ --conf spark.shuffle.memoryFraction=0.6 \
+ --conf spark.rdd.compress=true \
+ --jars "lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${input_path} ${output_path} ${api_name} ${is_raw} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.graph.TriangleCountRunner \
+ --deploy-mode ${deploy_mode} \
+ --driver-cores ${driver_cores} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options} -XX:SurvivorRatio=4 -XX:ParallelGCThreads=6" \
+ --conf spark.rpc.askTimeout=36000 \
+ --conf spark.scheduler.maxRegisteredResourcesWaitingTime=3600000 \
+ --conf spark.worker.timeout=3600 \
+ --conf spark.network.timeout=6000s \
+ --conf spark.storage.blockManagerSlaveTimeoutMs=600000 \
+ --conf spark.shuffle.blockTransferService=nio \
+ --conf spark.driver.maxResultSize=100g \
+ --conf spark.shuffle.manager=SORT \
+ --conf spark.broadcast.blockSize=25g \
+ --conf spark.rpc.message.maxSize=2046 \
+ --conf spark.core.connection.ack.wait.timeout=60000s \
+ --conf spark.storage.memoryFraction=0.2 \
+ --conf spark.shuffle.memoryFraction=0.6 \
+ --conf spark.rdd.compress=true \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${input_path} ${output_path} ${api_name} ${is_raw} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/tpr_run.sh b/tools/kal-test/bin/graph/tpr_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e76c4153757c3f36e16d950b67440d91f72fb199
--- /dev/null
+++ b/tools/kal-test/bin/graph/tpr_run.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: twitter_tpr"
+ echo "2nd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ];then
+ alg_usage
+ exit 0
+fi
+
+source conf/graph/tpr/tpr_spark.properties
+
+dataset_name=$1
+is_raw=$2
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${dataset_name}_${cpu_name}
+executor_cores="executorCores_"${dataset_name}_${cpu_name}
+executor_memory="executorMemory_"${dataset_name}_${cpu_name}
+extra_java_options="extraJavaOptions_"${dataset_name}_${cpu_name}
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+deploy_mode_val=${!deploy_mode}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_java_options_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path=${dataset_name}
+data_path_val=${!data_path}
+echo "${dataset_name} : ${data_path_val}"
+
+output_path="${output_path_prefix}/tpr/${is_raw}/${dataset_name}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- TrillionPageRank"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.TrillionPageRankRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 80g \
+ --conf spark.driver.maxResultSize=80g \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.kryoserializer.buffer.max=2040m \
+ --conf spark.rdd.compress=true \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${data_path_val} ${output_path} ${is_raw} | tee ./log/log
+else
+ scp lib/kal-test_${scala_version_val}-0.1.jar root@agent1:/opt/graph_classpath/
+ scp lib/kal-test_${scala_version_val}-0.1.jar root@agent2:/opt/graph_classpath/
+ scp lib/kal-test_${scala_version_val}-0.1.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.TrillionPageRankRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 80g \
+ --conf spark.driver.maxResultSize=80g \
+ --conf spark.locality.wait.node=0 \
+ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+ --conf spark.kryoserializer.buffer.max=2040m \
+ --conf spark.rdd.compress=true \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/kal-test_${scala_version_val}-0.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${data_path_val} ${output_path} ${is_raw} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph/tr_run.sh b/tools/kal-test/bin/graph/tr_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d88a27d372a387f72f1ac62533558efeef36c45f
--- /dev/null
+++ b/tools/kal-test/bin/graph/tr_run.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: cit_patents,uk_2002,arabic_2005"
+ echo "2nd argument: name of api: run,runUntilConvergence"
+ echo "3nd argument: seeds count: 100,500,1000"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ];then
+ alg_usage
+ exit 0
+fi
+
+source conf/graph/tr/tr_spark.properties
+
+dataset_name=$1
+api_name=$2
+seedsCount=$3
+
+if [ ${dataset_name} != "cit_patents" ] &&
+ [ ${dataset_name} != "uk_2002" ] &&
+ [ ${dataset_name} != "arabic_2005" ] ;then
+ echo "invalid dataset name,dataset name:cit_patents,uk_2002,arabic_2005"
+ exit 1
+fi
+if [ ${api_name} != "run" ] &&
+ [ ${api_name} != "runUntilConvergence" ] ;then
+ echo "invalid argument value,api name: run,runUntilConvergence"
+ exit 1
+fi
+if [ ${seedsCount} != "100" ] && [ ${seedsCount} != "500" ] && [ ${seedsCount} != "1000" ];then
+ echo "invalid argument value,must be: 100 or 500 or 1000"
+ exit 1
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+num_executors="${api_name}_${dataset_name}_${seedsCount}_numExecutors_${cpu_name}"
+executor_cores="${api_name}_${dataset_name}_${seedsCount}_executorCores_${cpu_name}"
+executor_memory="${api_name}_${dataset_name}_${seedsCount}_executorMemory_${cpu_name}"
+num_partitions="${api_name}_${dataset_name}_${seedsCount}_numPartitions_${cpu_name}"
+extra_Java_Options="${api_name}_${dataset_name}_${seedsCount}_extraJavaOptions_${cpu_name}"
+deploy_mode="deployMode"
+driver_memory="driverMemory"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+deploy_mode_val=${!deploy_mode}
+num_partitions_val=${!num_partitions}
+extra_Java_Options_val=${!extra_Java_Options}
+driver_memory_val=${!driver_memory}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${num_partitions} : ${num_partitions_val}"
+echo "${driver_memory}:${driver_memory_val}"
+echo "${extra_Java_Options}:${extra_Java_Options_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${num_partitions_val} ] ; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+output_path="${output_path_prefix}/tr/${dataset_name}_${seedsCount}_${api_name}"
+echo "${dataset_name} : ${data_path_val}"
+echo "output_path : ${output_path}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- tr-${dataset_name}-${api_name}-${seedsCount}"
+spark-submit \
+--class com.bigdata.graph.TrustRankRunner \
+--master yarn \
+--deploy-mode ${deploy_mode_val} \
+--num-executors ${num_executors_val} \
+--executor-memory ${executor_memory_val} \
+--executor-cores ${executor_cores_val} \
+--driver-memory ${driver_memory_val} \
+--conf spark.executor.extraJavaOptions=${extra_Java_Options_val} \
+--conf spark.driver.maxResultSize=200g \
+--conf spark.driver.extraJavaOptions="-Xms100G" \
+--conf spark.locality.wait.node=0 \
+--jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${num_partitions_val} "no" ${data_path_val} ${api_name} ${seedsCount} ${output_path} | tee ./log/log
diff --git a/tools/kal-test/bin/graph/wce_run.sh b/tools/kal-test/bin/graph/wce_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4eb0cfc559bd4f55963ad8e6d333436e179ab7a3
--- /dev/null
+++ b/tools/kal-test/bin/graph/wce_run.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "please input 1 argument: "
+ echo "1st argument: name of dataset: graph500_24, graph500_25, graph500_26"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 1 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+
+source conf/graph/wce/wce_spark.properties
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+num_executors="${dataset_name}_numExectuors_${cpu_name}"
+executor_cores="${dataset_name}_executorCores_${cpu_name}"
+executor_memory="${dataset_name}_executorMemory_${cpu_name}"
+extra_java_options="${dataset_name}_extraJavaOptions_${cpu_name}"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_java_options_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+output_path="${output_path_prefix}/wce/${dataset_name}"
+echo "${dataset_name} : ${data_path_val}"
+echo "output_path : ${output_path}"
+hdfs dfs -rm -r -f ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+echo "start to submit spark jobs -- wce-${dataset_name}"
+spark-submit \
+--class com.bigdata.graph.WCERunner \
+--driver-memory 80g \
+--master yarn \
+--num-executors ${num_executors_val} \
+--executor-cores ${executor_cores_val} \
+--executor-memory ${executor_memory_val} \
+--conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+--jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--driver-class-path "lib/fastutil-8.3.1.jar:lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${data_path_val} ${output_path} | tee ./log/log
diff --git a/tools/kal-test/bin/graph/wce_run_hive.sh b/tools/kal-test/bin/graph/wce_run_hive.sh
new file mode 100644
index 0000000000000000000000000000000000000000..474217b73406b783df21c62c14fed31cb532a43b
--- /dev/null
+++ b/tools/kal-test/bin/graph/wce_run_hive.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+set -e
+
+case "$1" in
+-h | --help | ?)
+ echo "Usage: "
+ exit 0
+ ;;
+esac
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+if [ ${cpu_name} == "aarch64" ]
+then
+ cpu_name="aarch_64"
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+table_name=$1
+col1=$2
+col2=$3
+maxIter=$4
+maxDegree=$5
+save_mode=$6
+save_arg=$7
+
+spark-submit \
+--class com.bigdata.graph.WCEHiveRunner \
+--driver-memory 80g \
+--master yarn \
+--num-executors 35 \
+--executor-cores 8 \
+--executor-memory "25g" \
+--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
+--conf spark.rdd.compress=true \
+--conf spark.shuffle.compress=true \
+--conf spark.shuffle.spill.compress=true \
+--conf spark.io.compression.codec=lz4 \
+--jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+--conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+./lib/kal-test_${scala_version_val}-0.1.jar ${table_name} ${col1} ${col2} ${maxIter} ${maxDegree} ${save_mode} ${save_arg}
diff --git a/tools/kal-test/bin/graph/wlpa_run.sh b/tools/kal-test/bin/graph/wlpa_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fdf399ac1238882e11343b063be9f9882f9488e2
--- /dev/null
+++ b/tools/kal-test/bin/graph/wlpa_run.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: enwiki_2018,arabic_2005,GAP_twitter"
+ echo "2nd argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ];then
+ alg_usage
+ exit 0
+fi
+
+dataset_name=$1
+is_raw=$2
+
+if [ $dataset_name != 'enwiki_2018' ] && [ $dataset_name != 'arabic_2005' ] && [ $dataset_name != 'GAP_twitter' ];
+then
+ echo 'invalid dataset'
+ echo "dataset name: enwiki_2018 or arabic_2005 or GAP_twitter"
+ exit 0
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${cpu_name}-${is_raw}
+
+source conf/graph/wlpa/wlpa_spark.properties
+num_executors_val="numExecutors_${dataset_name}_${cpu_name}"
+executor_cores_val="executorCores_${dataset_name}_${cpu_name}"
+executor_memory_val="executorMemory_${dataset_name}_${cpu_name}"
+executor_extra_javaopts_val="executorExtraJavaopts_${dataset_name}_${cpu_name}"
+
+master_val="master"
+deploy_mode_val="deployMode"
+driver_memory_val="driverMemory"
+num_executors=${!num_executors_val}
+executor_cores=${!executor_cores_val}
+executor_memory=${!executor_memory_val}
+master=${!master_val}
+driver_memory=${!driver_memory_val}
+deploy_mode=${!deploy_mode_val}
+executor_extra_javaopts=${!executor_extra_javaopts_val}
+if [ ! ${num_executors} ] \
+ || [ ! ${executor_cores} ] \
+ || [ ! ${executor_memory} ] \
+ || [ ! ${master} ]; then
+ echo "Some values are NUll, please confirm with the property files"
+ exit 0
+fi
+echo "${master_val}:${master}"
+echo "${deploy_mode_val}:${deploy_mode}"
+echo "${num_executors_val}:${num_executors}"
+echo "${executor_cores_val}:${executor_cores}"
+echo "${executor_memory_val}:${executor_memory}"
+echo "${executor_extra_javaopts_val}:${executor_extra_javaopts}"
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+input_path=${!dataset_name}
+output_path="/tmp/graph/result/wlpa/${is_raw}/${dataset_name}"
+echo "${dataset_name}: ${input_path},${output_path}"
+
+echo "start to clean exist output"
+hdfs dfs -rm -r -f -skipTrash ${output_path}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs -- wlpa-${dataset_name}"
+
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.WeightedLablePropagationRunner \
+ --deploy-mode ${deploy_mode} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_javaopts}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/fastutil-8.3.1.jar:/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${input_path} ${output_path} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.graph.WeightedLablePropagationRunner \
+ --deploy-mode ${deploy_mode} \
+ --driver-memory ${driver_memory} \
+ --num-executors ${num_executors} \
+ --executor-cores ${executor_cores} \
+ --executor-memory ${executor_memory} \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_javaopts}" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${input_path} ${output_path} | tee ./log/log
+fi
diff --git a/tools/kal-test/bin/graph/wpr_run.sh b/tools/kal-test/bin/graph/wpr_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c24781cf99669dadb2e302320e8b4c2525e60f74
--- /dev/null
+++ b/tools/kal-test/bin/graph/wpr_run.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: cage14, GAP_road, GAP_twitter"
+ echo "2nd argument: name of api: static, convergence"
+ echo "3rd argument: optimization algorithm or raw: no, yes"
+}
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ];then
+ usage
+ exit 0
+fi
+
+dataset_name=$1
+api_name=$2
+is_raw=$3
+
+if [ ${dataset_name} != "GAP_road" ] && [ ${dataset_name} != "cage14" ] && [ ${dataset_name} != "GAP_twitter" ];then
+ echo "invalid dataset name, dataset name:GAP_road, cage14, GAP_twitter"
+ exit 1
+fi
+if [ ${api_name} != "static" ] && [ ${api_name} != "convergence" ];then
+ echo "invalid api name,api name: static or convergence"
+ exit 1
+fi
+
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+prefix="run"
+if [ ${api_name} == "runUntilConvergence" ]
+then
+ prefix="convergence"
+fi
+
+source conf/graph/wpr/wpr_spark.properties
+# concatnate strings as a new variable
+deploy_mode="deployMode"
+num_executors="numExecutors_"${cpu_name}
+executor_cores="executorCores_"${cpu_name}
+executor_memory="executorMemory_"${cpu_name}
+extra_java_options="extraJavaOptions_"${cpu_name}
+split="split_graph"
+
+if [ ${is_raw} != "no" ]; then
+ num_executors=${api_name}_${dataset_name}_numExecutors
+ executor_cores=${api_name}_${dataset_name}_executorCores
+ executor_memory=${api_name}_${dataset_name}_executorMemory
+ extra_java_options=${api_name}_${dataset_name}_extraJavaOptions
+ partition=${api_name}_${dataset_name}_partition
+ iter=${api_name}_iter
+ tolerance=${api_name}_tolerance
+
+ iter_val=${!iter}
+ tolerance_val=${!tolerance}
+ partition_val=${!partition}
+fi
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+deploy_mode_val=${!deploy_mode}
+split_val=${!split}
+
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "split : ${split_val}"
+
+if [ ! ${num_executors_val} ] ||
+ [ ! ${executor_cores_val} ] ||
+ [ ! ${executor_memory_val} ] ||
+ [ ! ${extra_java_options_val} ] ||
+ [ ! ${split_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/graph/graph_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+output_path_val=${output_path_prefix}/wpr/${is_raw}/${dataset_name}_${api_name}
+echo "${dataset_name} : ${data_path_val}"
+echo "output_path : ${output_path_val}"
+hdfs dfs -rm -r -f ${output_path_val}
+
+echo "start to clean cache and sleep 3s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 3
+
+echo "start to submit spark jobs -- wpr-${dataset_name}-${api_name}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/graph_classpath/
+ scp lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/graph_classpath/
+
+ spark-submit \
+ --class com.bigdata.graph.WeightedPageRankRunner \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 100g \
+ --conf spark.driver.maxResultSize=200g \
+ --conf spark.driver.extraJavaOptions="-Xms100G" \
+ --conf spark.locality.wait.node=0 \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf spark.shuffle.blockTransferService=nio \
+ --jars "lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/graph_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${dataset_name} ${data_path_val} ${output_path_val} ${api_name} ${is_raw} ${split_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.soundcloud.spark.pagerank.SparkPageRankTest \
+ --master yarn \
+ --deploy-mode ${deploy_mode_val} \
+ --num-executors ${num_executors_val} \
+ --executor-memory ${executor_memory_val} \
+ --executor-cores ${executor_cores_val} \
+ --driver-memory 100g \
+ --conf spark.driver.maxResultSize=200g \
+ --conf spark.driver.extraJavaOptions="-Xms100G" \
+ --conf spark.locality.wait.node=0 \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf spark.shuffle.blockTransferService=nio \
+ ./lib/spark-pagerank-1.0-SNAPSHOT.jar ${data_path_val} ${split_val} ${partition_val} ${output_path_val} 0.15 ${iter_val} ${tolerance_val}| tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/graph_workflow.sh b/tools/kal-test/bin/graph_workflow.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5b6b1f2b1368c21ae38f22d7cdb0295ec1c6b81e
--- /dev/null
+++ b/tools/kal-test/bin/graph_workflow.sh
@@ -0,0 +1,312 @@
+#!/bin/bash
+set -e
+
+function alg_usage() {
+ echo "Usage: "
+ echo "1st argument: optimization algorithm or raw: no/yes"
+ echo "2nd argument: verify result: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ alg_usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ]; then
+ alg_usage
+ exit 0
+fi
+
+is_raw=$1
+is_check=$2
+
+type=opt
+if [ $is_raw == "yes" ]; then
+ type=raw
+fi
+
+function createDir() {
+ dir=$1
+ if [ ! -d $dir ]; then
+ mkdir $dir
+ fi
+}
+createDir logs
+createDir log
+createDir report
+
+graph_classpath=/opt/graph_classpath/
+function ssh_mkdir() {
+ server=$1
+ dir=$2
+ ssh $server "mkdir -p $dir"
+}
+ssh_mkdir agent1 $graph_classpath
+ssh_mkdir agent2 $graph_classpath
+ssh_mkdir agent3 $graph_classpath
+
+# betweenness
+./bin/graph/betweenness_run.sh cit_patents ${is_raw} ${is_check} 2>&1 | tee -a logs/betweenness_cit_patents_${type}.log
+./bin/graph/betweenness_run.sh enwiki_2018 ${is_raw} ${is_check} 2>&1 | tee -a logs/betweenness_enwiki_2018_${type}.log
+./bin/graph/betweenness_run.sh uk_2002 ${is_raw} ${is_check} 2>&1 | tee -a logs/betweenness_uk_2002_${type}.log
+
+# bfs
+./bin/graph/bfs_run.sh cit_patents ${is_raw} 2>&1 | tee -a logs/bfs_cit_patents_${type}.log
+./bin/graph/bfs_run.sh enwiki_2018 ${is_raw} 2>&1 | tee -a logs/bfs_enwiki_2018_${type}.log
+./bin/graph/bfs_run.sh arabic_2005 ${is_raw} 2>&1 | tee -a logs/bfs_arabic_2005_${type}.log
+./bin/graph/bfs_run.sh graph500_22 ${is_raw} 2>&1 | tee -a logs/bfs_graph500_22_${type}.log
+./bin/graph/bfs_run.sh graph500_23 ${is_raw} 2>&1 | tee -a logs/bfs_graph500_23_${type}.log
+./bin/graph/bfs_run.sh graph500_25 ${is_raw} 2>&1 | tee -a logs/bfs_graph500_25_${type}.log
+
+# cc
+./bin/graph/cc_run.sh graph500_25 ${is_raw} 2>&1 | tee -a logs/cc_graph500_25_${type}.log
+./bin/graph/cc_run.sh graph500_26 ${is_raw} 2>&1 | tee -a logs/cc_graph500_26_${type}.log
+./bin/graph/cc_run.sh liveJournal ${is_raw} 2>&1 | tee -a logs/cc_liveJournal_${type}.log
+
+# cd
+./bin/graph/cd_run.sh simulate1 2>&1 | tee -a logs/cd_simulate1.log
+./bin/graph/cd_run.sh simulate2 2>&1 | tee -a logs/cd_simulate2.log
+./bin/graph/cd_run.sh usaRoad 2>&1 | tee -a logs/cd_usaRoad.log
+
+# closeness
+./bin/graph/closeness_run.sh cit_patents weighted ${is_check} 2>&1 | tee -a logs/closeness_cit_patents_weighted_${type}.log
+./bin/graph/closeness_run.sh uk_2002 weighted ${is_check} 2>&1 | tee -a logs/closeness_uk_2002_weighted_${type}.log
+
+./bin/graph/closeness_run.sh cit_patents unweighted ${is_check} 2>&1 | tee -a logs/closeness_cit_patents_unweighted_${type}.log
+./bin/graph/closeness_run.sh uk_2002 unweighted ${is_check} 2>&1 | tee -a logs/closeness_uk_2002_unweighted_${type}.log
+
+# clusteringcoefficient
+./bin/graph/clusteringcoefficient_run.sh cit_patents lcc weighted no 2>&1 | tee -a logs/clusteringcoefficient_cit_patents_lcc_weighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh uk_2002 lcc weighted no 2>&1 | tee -a logs/clusteringcoefficient_uk_2002_lcc_weighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh arabic_2005 lcc weighted no 2>&1 | tee -a logs/clusteringcoefficient_arabic_2005_lcc_weighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh cit_patents lcc unweighted no 2>&1 | tee -a logs/clusteringcoefficient_cit_patents_lcc_unweighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh uk_2002 lcc unweighted no 2>&1 | tee -a logs/clusteringcoefficient_uk_2002_lcc_unweighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh arabic_2005 lcc unweighted no 2>&1 | tee -a logs/clusteringcoefficient_arabic_2005_lcc_unweighted_${type}.log
+
+./bin/graph/clusteringcoefficient_run.sh graph500_22 lcc weighted no 2>&1 | tee -a logs/clusteringcoefficient_graph500_22_lcc_weighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_23 lcc weighted no 2>&1 | tee -a logs/clusteringcoefficient_graph500_23_lcc_weighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_24 lcc weighted no 2>&1 | tee -a logs/clusteringcoefficient_graph500_24_lcc_weighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_25 lcc weighted no 2>&1 | tee -a logs/clusteringcoefficient_graph500_25_lcc_weighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_22 lcc unweighted ${is_raw} 2>&1 | tee -a logs/clusteringcoefficient_graph500_22_lcc_unweighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_23 lcc unweighted ${is_raw} 2>&1 | tee -a logs/clusteringcoefficient_graph500_23_lcc_unweighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_24 lcc unweighted no 2>&1 | tee -a logs/clusteringcoefficient_graph500_24_lcc_unweighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_25 lcc unweighted no 2>&1 | tee -a logs/clusteringcoefficient_graph500_25_lcc_unweighted_${type}.log
+
+./bin/graph/clusteringcoefficient_run.sh graph500_22 avgcc unweighted ${is_raw} 2>&1 | tee -a logs/clusteringcoefficient_graph500_22_avgcc_unweighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_23 avgcc unweighted ${is_raw} 2>&1 | tee -a logs/clusteringcoefficient_graph500_23_avgcc_unweighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_24 avgcc unweighted no 2>&1 | tee -a logs/clusteringcoefficient_graph500_24_avgcc_unweighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_25 avgcc unweighted no 2>&1 | tee -a logs/clusteringcoefficient_graph500_25_avgcc_unweighted_${type}.log
+
+./bin/graph/clusteringcoefficient_run.sh graph500_22 globalcc unweighted ${is_raw} 2>&1 | tee -a logs/clusteringcoefficient_graph500_22_globalcc_unweighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_23 globalcc unweighted ${is_raw} 2>&1 | tee -a logs/clusteringcoefficient_graph500_23_globalcc_unweighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_24 globalcc unweighted no 2>&1 | tee -a logs/clusteringcoefficient_graph500_24_globalcc_unweighted_${type}.log
+./bin/graph/clusteringcoefficient_run.sh graph500_25 globalcc unweighted no 2>&1 | tee -a logs/clusteringcoefficient_graph500_25_globalcc_unweighted_${type}.log
+
+# degree
+./bin/graph/degree_run.sh mycielskian20 degrees ${is_raw} 2>&1 | tee -a logs/degree_mycielskian20_degrees_${type}.log
+./bin/graph/degree_run.sh gap_kron degrees ${is_raw} 2>&1 | tee -a logs/degree_gap_kron_degrees_${type}.log
+./bin/graph/degree_run.sh com_friendster degrees ${is_raw} 2>&1 | tee -a logs/degree_com_friendster_degrees_${type}.log
+
+./bin/graph/degree_run.sh it_2004 inDegrees ${is_raw} 2>&1 | tee -a logs/degree_it_2004_inDegrees_${type}.log
+./bin/graph/degree_run.sh twitter7 inDegrees ${is_raw} 2>&1 | tee -a logs/degree_twitter7_inDegrees_${type}.log
+./bin/graph/degree_run.sh uk_2007_05 inDegrees ${is_raw} 2>&1 | tee -a logs/degree_uk_2007_05_inDegrees_${type}.log
+
+./bin/graph/degree_run.sh it_2004 outDegrees ${is_raw} 2>&1 | tee -a logs/degree_it_2004_outDegrees_${type}.log
+./bin/graph/degree_run.sh twitter7 outDegrees ${is_raw} 2>&1 | tee -a logs/degree_twitter7_outDegrees_${type}.log
+./bin/graph/degree_run.sh uk_2007_05 outDegrees ${is_raw} 2>&1 | tee -a logs/degree_uk_2007_05_outDegrees_${type}.log
+
+# incpr
+./bin/graph/incpr_run.sh twitter_2010 0.001 1 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.001_1_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.001 2 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.001_2_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.001 3 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.001_3_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.001 4 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.001_4_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.001 5 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.001_5_${type}.log
+
+./bin/graph/incpr_run.sh twitter_2010 0.01 1 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.01_1_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.01 2 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.01_2_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.01 3 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.01_3_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.01 4 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.01_4_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.01 5 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.01_5_${type}.log
+
+./bin/graph/incpr_run.sh twitter_2010 0.05 1 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.05_1_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.05 2 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.05_2_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.05 3 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.05_3_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.05 4 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.05_4_${type}.log
+./bin/graph/incpr_run.sh twitter_2010 0.05 5 ${is_raw} 2>&1 | tee -a logs/incpr_twitter_2010_0.05_5_${type}.log
+
+# kcore
+./bin/graph/kcore_run.sh graph500_22 ${is_raw} 2>&1 | tee -a logs/kcore_graph500_22_${type}.log
+./bin/graph/kcore_run.sh graph500_23 ${is_raw} 2>&1 | tee -a logs/kcore_graph500_23_${type}.log
+./bin/graph/kcore_run.sh graph500_25 ${is_raw} 2>&1 | tee -a logs/kcore_graph500_25_${type}.log
+./bin/graph/kcore_run.sh graph500_26 ${is_raw} 2>&1 | tee -a logs/kcore_graph500_26_${type}.log
+
+# louvain
+./bin/graph/louvain_run.sh graph500_22 ${is_raw} 2>&1 | tee -a logs/louvain_graph500_22_${type}.log
+./bin/graph/louvain_run.sh graph500_24 ${is_raw} 2>&1 | tee -a logs/louvain_graph500_24_${type}.log
+./bin/graph/louvain_run.sh graph500_25 ${is_raw} 2>&1 | tee -a logs/louvain_graph500_25_${type}.log
+
+./bin/graph/louvain_run.sh cit_patents no 2>&1 | tee -a logs/louvain_cit_patents_${type}.log
+./bin/graph/louvain_run.sh uk_2002 no 2>&1 | tee -a logs/louvain_uk_2002_${type}.log
+./bin/graph/louvain_run.sh arabic_2005 no 2>&1 | tee -a logs/louvain_arabic_2005_${type}.log
+
+# lpa
+./bin/graph/lpa_run.sh graph500_22 runConvergence no 2>&1 | tee -a logs/lpa_graph500_22_runConvergence_${type}.log
+./bin/graph/lpa_run.sh graph500_24 runConvergence no 2>&1 | tee -a logs/lpa_graph500_24_runConvergence_${type}.log
+./bin/graph/lpa_run.sh graph500_25 runConvergence no 2>&1 | tee -a logs/lpa_graph500_25_runConvergence_${type}.log
+
+./bin/graph/lpa_run.sh graph500_22 run ${is_raw} 2>&1 | tee -a logs/lpa_graph500_22_run_${type}.log
+./bin/graph/lpa_run.sh graph500_24 run ${is_raw} 2>&1 | tee -a logs/lpa_graph500_24_run_${type}.log
+./bin/graph/lpa_run.sh graph500_25 run ${is_raw} 2>&1 | tee -a logs/lpa_graph500_25_run_${type}.log
+
+# mce
+./bin/graph/mce_run.sh graph500_23 2>&1 | tee -a logs/mce_graph500_23_${type}.log
+./bin/graph/mce_run.sh graph500_24 2>&1 | tee -a logs/mce_graph500_24_${type}.log
+./bin/graph/mce_run.sh graph500_25 2>&1 | tee -a logs/mce_graph500_25_${type}.log
+
+# modularity
+./bin/graph/modularity_run.sh graph500_23 ${is_raw} 2>&1 | tee -a logs/modularity_graph500_23_${type}.log
+./bin/graph/modularity_run.sh graph500_25 ${is_raw} 2>&1 | tee -a logs/modularity_graph500_25_${type}.log
+./bin/graph/modularity_run.sh graph500_26 ${is_raw} 2>&1 | tee -a logs/modularity_graph500_26_${type}.log
+
+./bin/graph/modularity_run.sh uk_2002 no 2>&1 | tee -a logs/modularity_uk_${type}.log
+./bin/graph/modularity_run.sh arabic_2005 no 2>&1 | tee -a logs/modularity_arabic_${type}.log
+./bin/graph/modularity_run.sh twitter no 2>&1 | tee -a logs/modularity_twitter_${type}.log
+
+# mssp
+./bin/graph/mssp_run.sh soc_liveJournal 5 ${is_raw} 2>&1 | tee -a logs/mssp_liveJournal_5_${type}.log
+./bin/graph/mssp_run.sh uk_2002 5 ${is_raw} 2>&1 | tee -a logs/mssp_uk_2002_5_${type}.log
+./bin/graph/mssp_run.sh arabic_2005 5 ${is_raw} 2>&1 | tee -a logs/mssp_arabic_2005_5_${type}.log
+
+./bin/graph/mssp_run.sh soc_liveJournal 50 ${is_raw} 2>&1 | tee -a logs/mssp_liveJournal_50_${type}.log
+./bin/graph/mssp_run.sh uk_2002 50 ${is_raw} 2>&1 | tee -a logs/mssp_uk_2002_50_${type}.log
+./bin/graph/mssp_run.sh arabic_2005 50 ${is_raw} 2>&1 | tee -a logs/mssp_arabic_2005_50_${type}.log
+
+# node2vec
+./bin/graph/node2vec_run.sh cit_patents ${is_raw} ${is_check} 2>&1 | tee -a logs/node2vec_cit_patents_${type}.log
+./bin/graph/node2vec_run.sh soc_liveJournal no ${is_check} 2>&1 | tee -a logs/node2vec_soc_liveJournal_${type}.log
+./bin/graph/node2vec_run.sh uk_2002 no ${is_check} 2>&1 | tee -a logs/node2vec_uk_2002_${type}.log
+
+# ppr
+./bin/graph/ppr_run.sh cit_patents fixMS ${is_raw} 1 2>&1 | tee -a logs/ppr_cit_patents_fixMS_1_${type}.log
+./bin/graph/ppr_run.sh cit_patents fixMS ${is_raw} 5 2>&1 | tee -a logs/ppr_cit_patents_fixMS_5_${type}.log
+./bin/graph/ppr_run.sh cit_patents fixMS ${is_raw} 10 2>&1 | tee -a logs/ppr_cit_patents_fixMS_10_${type}.log
+./bin/graph/ppr_run.sh cit_patents fixMS ${is_raw} 50 2>&1 | tee -a logs/ppr_cit_patents_fixMS_50_${type}.log
+./bin/graph/ppr_run.sh cit_patents fixMS ${is_raw} 100 2>&1 | tee -a logs/ppr_cit_patents_fixMS_100_${type}.log
+
+./bin/graph/ppr_run.sh uk_2002 fixMS ${is_raw} 1 2>&1 | tee -a logs/ppr_uk_2002_fixMS_1_${type}.log
+./bin/graph/ppr_run.sh uk_2002 fixMS ${is_raw} 5 2>&1 | tee -a logs/ppr_uk_2002_fixMS_5_${type}.log
+./bin/graph/ppr_run.sh uk_2002 fixMS ${is_raw} 10 2>&1 | tee -a logs/ppr_uk_2002_fixMS_10_${type}.log
+./bin/graph/ppr_run.sh uk_2002 fixMS ${is_raw} 50 2>&1 | tee -a logs/ppr_uk_2002_fixMS_50_${type}.log
+./bin/graph/ppr_run.sh uk_2002 fixMS ${is_raw} 100 2>&1 | tee -a logs/ppr_uk_2002_fixMS_100_${type}.log
+
+./bin/graph/ppr_run.sh arabic_2005 fixMS ${is_raw} 1 2>&1 | tee -a logs/ppr_arabic_2005_fixMS_1_${type}.log
+./bin/graph/ppr_run.sh arabic_2005 fixMS ${is_raw} 5 2>&1 | tee -a logs/ppr_arabic_2005_fixMS_5_${type}.log
+./bin/graph/ppr_run.sh arabic_2005 fixMS ${is_raw} 10 2>&1 | tee -a logs/ppr_arabic_2005_fixMS_10_${type}.log
+./bin/graph/ppr_run.sh arabic_2005 fixMS ${is_raw} 50 2>&1 | tee -a logs/ppr_arabic_2005_fixMS_50_${type}.log
+./bin/graph/ppr_run.sh arabic_2005 fixMS ${is_raw} 100 2>&1 | tee -a logs/ppr_arabic_2005_fixMS_100_${type}.log
+
+./bin/graph/ppr_run.sh cit_patents fixSS ${is_raw} 2>&1 | tee -a logs/ppr_cit_patents_fixSS_${type}.log
+./bin/graph/ppr_run.sh uk_2002 fixSS ${is_raw} 2>&1 | tee -a logs/ppr_uk_2002_fixSS_${type}.log
+./bin/graph/ppr_run.sh arabic_2005 fixSS ${is_raw} 2>&1 | tee -a logs/ppr_arabic_2005_fixSS_${type}.log
+
+./bin/graph/ppr_run.sh cit_patents conSS ${is_raw} 2>&1 | tee -a logs/ppr_cit_patents_conSS_${type}.log
+./bin/graph/ppr_run.sh uk_2002 conSS ${is_raw} 2>&1 | tee -a logs/ppr_uk_2002_conSS_${type}.log
+./bin/graph/ppr_run.sh arabic_2005 conSS ${is_raw} 2>&1 | tee -a logs/ppr_arabic_2005_conSS_${type}.log
+
+# pr
+./bin/graph/pr_run.sh cit_patents run ${is_raw} 2>&1 | tee -a logs/pr_cit_patents_run_${type}.log
+./bin/graph/pr_run.sh uk_2002 run ${is_raw} 2>&1 | tee -a logs/pr_uk_2002_run_${type}.log
+./bin/graph/pr_run.sh arabic_2005 run ${is_raw} 2>&1 | tee -a logs/pr_arabic_2005_run_${type}.log
+
+./bin/graph/pr_run.sh cit_patents runUntilConvergence ${is_raw} 2>&1 | tee -a logs/pr_cit_patents_runUntilConvergence_${type}.log
+./bin/graph/pr_run.sh uk_2002 runUntilConvergence ${is_raw} 2>&1 | tee -a logs/pr_uk_2002_runUntilConvergence_${type}.log
+./bin/graph/pr_run.sh arabic_2005 runUntilConvergence ${is_raw} 2>&1 | tee -a logs/pr_arabic_2005_runUntilConvergence_${type}.log
+
+# scc
+./bin/graph/scc_run.sh cit_patents ${is_raw} 2>&1 | tee -a logs/scc_cit_patents_${type}.log
+./bin/graph/scc_run.sh enwiki_2018 ${is_raw} 2>&1 | tee -a logs/scc_enwiki_2018_${type}.log
+./bin/graph/scc_run.sh arabic_2005 ${is_raw} 2>&1 | tee -a logs/scc_arabic_2005_${type}.log
+
+# sgm
+./bin/graph/sgm_run.sh graph500_19 4dgn Identical no 2>&1 | tee -a logs/sgm_graph500_19_4dgn_Identical_${type}.log
+./bin/graph/sgm_run.sh graph500_19 4sqr Identical no 2>&1 | tee -a logs/sgm_graph500_19_4sqr_Identical_${type}.log
+./bin/graph/sgm_run.sh graph500_19 5tree Identical no 2>&1 | tee -a logs/sgm_graph500_19_5tree_Identical_${type}.log
+./bin/graph/sgm_run.sh graph500_19 6star Identical no 2>&1 | tee -a logs/sgm_graph500_19_6star_Identical_${type}.log
+
+./bin/graph/sgm_run.sh liveJournal 4dgn Identical no 2>&1 | tee -a logs/sgm_liveJournal_4dgn_Identical_${type}.log
+./bin/graph/sgm_run.sh liveJournal 4sqr Identical no 2>&1 | tee -a logs/sgm_liveJournal_4sqr_Identical_${type}.log
+./bin/graph/sgm_run.sh liveJournal 5tree Identical no 2>&1 | tee -a logs/sgm_liveJournal_5tree_Identical_${type}.log
+./bin/graph/sgm_run.sh liveJournal 6star Identical no 2>&1 | tee -a logs/sgm_liveJournal_6star_Identical_${type}.log
+
+./bin/graph/sgm_run.sh com_orkut 4dgn Identical no 2>&1 | tee -a logs/sgm_com_orkut_4dgn_Identical_${type}.log
+./bin/graph/sgm_run.sh com_orkut 4sqr Identical no 2>&1 | tee -a logs/sgm_com_orkut_4sqr_Identical_${type}.log
+./bin/graph/sgm_run.sh com_orkut 5tree Identical no 2>&1 | tee -a logs/sgm_com_orkut_5tree_Identical_${type}.log
+./bin/graph/sgm_run.sh com_orkut 6star Identical no 2>&1 | tee -a logs/sgm_com_orkut_6star_Identical_${type}.log
+
+./bin/graph/sgm_run.sh graph500_19 4dgn unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_graph500_19_unIdentical_4dgn_${type}.log
+./bin/graph/sgm_run.sh graph500_19 4clique unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_graph500_19_unIdentical_4clique_${type}.log
+./bin/graph/sgm_run.sh graph500_19 5clique unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_graph500_19_unIdentical_5clique_${type}.log
+./bin/graph/sgm_run.sh graph500_19 6clique unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_graph500_19_unIdentical_6clique_${type}.log
+
+./bin/graph/sgm_run.sh liveJournal 4dgn unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_liveJournal_unIdentical_4dgn_${type}.log
+./bin/graph/sgm_run.sh liveJournal 4clique unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_liveJournal_unIdentical_4clique_${type}.log
+./bin/graph/sgm_run.sh liveJournal 5clique unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_liveJournal_unIdentical_5clique_${type}.log
+./bin/graph/sgm_run.sh liveJournal 6clique unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_liveJournal_unIdentical_6clique_${type}.log
+
+./bin/graph/sgm_run.sh com_orkut 4dgn unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_com_orkut_unIdentical_4dgn_${type}.log
+./bin/graph/sgm_run.sh com_orkut 4clique unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_com_orkut_unIdentical_4clique_${type}.log
+./bin/graph/sgm_run.sh com_orkut 5clique unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_com_orkut_unIdentical_5clique_${type}.log
+./bin/graph/sgm_run.sh com_orkut 6clique unIdentical ${is_raw} 2>&1 | tee -a logs/sgm_com_orkut_unIdentical_6clique_${type}.log
+
+# tc
+./bin/graph/tc_run.sh graph500_22 run ${is_raw} 2>&1 | tee -a logs/tc_graph500_22_run_${type}.log
+./bin/graph/tc_run.sh graph500_23 run ${is_raw} 2>&1 | tee -a logs/tc_graph500_23_run_${type}.log
+./bin/graph/tc_run.sh graph500_24 run ${is_raw} 2>&1 | tee -a logs/tc_graph500_24_run_${type}.log
+./bin/graph/tc_run.sh graph500_25 run ${is_raw} 2>&1 | tee -a logs/tc_graph500_25_run_${type}.log
+./bin/graph/tc_run.sh graph500_26 run ${is_raw} 2>&1 | tee -a logs/tc_graph500_26_run_${type}.log
+
+./bin/graph/tc_run.sh graph500_22 preCanonical ${is_raw} 2>&1 | tee -a logs/tc_graph500_22_preCanonical_${type}.log
+./bin/graph/tc_run.sh graph500_23 preCanonical ${is_raw} 2>&1 | tee -a logs/tc_graph500_23_preCanonical_${type}.log
+./bin/graph/tc_run.sh graph500_24 preCanonical ${is_raw} 2>&1 | tee -a logs/tc_graph500_24_preCanonical_${type}.log
+./bin/graph/tc_run.sh graph500_25 preCanonical ${is_raw} 2>&1 | tee -a logs/tc_graph500_25_preCanonical_${type}.log
+./bin/graph/tc_run.sh graph500_26 preCanonical ${is_raw} 2>&1 | tee -a logs/tc_graph500_26_preCanonical_${type}.log
+
+# tpr
+./bin/graph/tpr_run.sh twitter_tpr ${is_raw} 2>&1 | tee -a logs/tpr_twitter_${type}.log
+
+# tr
+./bin/graph/tr_run.sh cit_patents run 100 2>&1 | tee -a logs/tr_cit_patents_run_100_${type}.log
+./bin/graph/tr_run.sh cit_patents run 500 2>&1 | tee -a logs/tr_cit_patents_run_500_${type}.log
+./bin/graph/tr_run.sh cit_patents run 1000 2>&1 | tee -a logs/tr_cit_patents_run_1000_${type}.log
+./bin/graph/tr_run.sh uk_2002 run 100 2>&1 | tee -a logs/tr_uk_2002_run_100_${type}.log
+./bin/graph/tr_run.sh uk_2002 run 500 2>&1 | tee -a logs/tr_uk_2002_run_500_${type}.log
+./bin/graph/tr_run.sh uk_2002 run 1000 2>&1 | tee -a logs/tr_uk_2002_run_1000_${type}.log
+./bin/graph/tr_run.sh arabic_2005 run 100 2>&1 | tee -a logs/tr_arabic_2005_run_100_${type}.log
+./bin/graph/tr_run.sh arabic_2005 run 500 2>&1 | tee -a logs/tr_arabic_2005_run_500_${type}.log
+./bin/graph/tr_run.sh arabic_2005 run 1000 2>&1 | tee -a logs/tr_arabic_2005_run_1000_${type}.log
+
+./bin/graph/tr_run.sh cit_patents runUntilConvergence 100 2>&1 | tee -a logs/tr_cit_patents_runUntilConvergence_100_${type}.log
+./bin/graph/tr_run.sh cit_patents runUntilConvergence 500 2>&1 | tee -a logs/tr_cit_patents_runUntilConvergence_500_${type}.log
+./bin/graph/tr_run.sh cit_patents runUntilConvergence 1000 2>&1 | tee -a logs/tr_cit_patents_runUntilConvergence_1000_${type}.log
+./bin/graph/tr_run.sh uk_2002 runUntilConvergence 100 2>&1 | tee -a logs/tr_uk_2002_runUntilConvergence_100_${type}.log
+./bin/graph/tr_run.sh uk_2002 runUntilConvergence 500 2>&1 | tee -a logs/tr_uk_2002_runUntilConvergence_500_${type}.log
+./bin/graph/tr_run.sh uk_2002 runUntilConvergence 1000 2>&1 | tee -a logs/tr_uk_2002_runUntilConvergence_1000_${type}.log
+./bin/graph/tr_run.sh arabic_2005 runUntilConvergence 100 2>&1 | tee -a logs/tr_arabic_2005_runUntilConvergence_100_${type}.log
+./bin/graph/tr_run.sh arabic_2005 runUntilConvergence 500 2>&1 | tee -a logs/tr_arabic_2005_runUntilConvergence_500_${type}.log
+./bin/graph/tr_run.sh arabic_2005 runUntilConvergence 1000 2>&1 | tee -a logs/tr_arabic_2005_runUntilConvergence_1000_${type}.log
+
+# wce
+./bin/graph/wce_run.sh graph500_24 2>&1 | tee -a logs/wce_graph500_24_${type}.log
+./bin/graph/wce_run.sh graph500_25 2>&1 | tee -a logs/wce_graph500_25_${type}.log
+./bin/graph/wce_run.sh graph500_26 2>&1 | tee -a logs/wce_graph500_26_${type}.log
+
+# wpr
+./bin/graph/wpr_run.sh cage14 static ${is_raw} 2>&1 | tee -a logs/wpr_cage14_static_${type}.log
+./bin/graph/wpr_run.sh GAP_road static ${is_raw} 2>&1 | tee -a logs/wpr_GAP_road_static_${type}.log
+./bin/graph/wpr_run.sh GAP_twitter static ${is_raw} 2>&1 | tee -a logs/wpr_GAP_twitter_static_${type}.log
+
+./bin/graph/wpr_run.sh cage14 convergence ${is_raw} 2>&1 | tee -a logs/wpr_cage14_convergence_${type}.log
+./bin/graph/wpr_run.sh GAP_road convergence ${is_raw} 2>&1 | tee -a logs/wpr_GAP_road_convergence_${type}.log
+./bin/graph/wpr_run.sh GAP_twitter convergence ${is_raw} 2>&1 | tee -a logs/wpr_GAP_twitter_convergence_${type}.log
diff --git a/tools/kal-test/bin/ml/als_run.sh b/tools/kal-test/bin/ml/als_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c4bac6cee7f9dbb14261ba8e2744515b34b4a66e
--- /dev/null
+++ b/tools/kal-test/bin/ml/als_run.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: type of data structure: [dataframe/rdd]"
+ echo "2nd argument: name of dataset: e.g. als/alsbs/alsh"
+ echo "3rd argument: name of API: e.g. fit/fit1/fit2/fit3; for rdd: train"
+ echo "4th argument: optimization algorithm or raw: [no/yes]"
+ echo "5th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 5 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/als/als_spark.properties
+data_structure=$1
+dataset_name=$2
+api_name=$3
+is_raw=$4
+if_check=$5
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${data_structure}-${dataset_name}-${api_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${cpu_name}
+executor_cores="executorCores_"${cpu_name}
+executor_memory="executorMemory_"${cpu_name}
+driver_cores="driverCores_"${cpu_name}
+driver_memory="driverMemory_"${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+
+spark_conf=${master_val}_${deploy_mode_val}_${num_executors_val}_${executor_cores_val}_${executor_memory_val}
+
+mkdir -p log
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- als-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.ALSRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --driver-java-options "-Xms20g -Xss5g" \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=-Xms20g -Xss5g" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --jars "lib/snakeyaml-1.19.jar,lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --driver-class-path "lib/snakeyaml-1.19.jar:lib/fastutil-8.3.1.jar" \
+ --class com.bigdata.ml.ALSRunner \
+ --driver-java-options "-Xms15g -Xss5g" \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=-Xms20g -Xss5g" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --jars "lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/bo_run.sh b/tools/kal-test/bin/ml/bo_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cc276213d9fe17e62b9b62e1f9f92b4f55b1d67a
--- /dev/null
+++ b/tools/kal-test/bin/ml/bo_run.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1rd argument: name of dataset: [BostonHousing/TitanicRf/TitanicGBT]"
+ echo "2th argument: optimization algorithm or raw: [no/yes]"
+ echo "3th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/bo/bo_spark.properties
+dataset_name=$1
+is_raw=$2
+if_check=$3
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${cpu_name}"_numExecutors"
+executor_cores=${cpu_name}"_executorCores"
+executor_memory=${cpu_name}"_executorMemory"
+driver_cores=${cpu_name}"_driverCores"
+driver_memory=${cpu_name}"_driverMemory"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+
+echo "master : ${master}"
+echo "deployMode : ${deployMode}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master} ] \
+ || [ ! ${deployMode} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path=${!dataset_name}
+
+echo "${dataset_name} : ${data_path}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- BayesianOptimization-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/snakeyaml-1.19.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/snakeyaml-1.19.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/snakeyaml-1.19.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.BORunner \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master} \
+ --conf spark.locality.wait=0s \
+ --conf spark.scheduler.minRegisteredResourcesRatio=1 \
+ --conf spark.driver.maxResultSize=10g \
+ --conf spark.network.timeout=60000s \
+ --conf spark.rpc.askTimeout=60000s \
+ --conf spark.executor.heartbeatInterval=600s \
+ --conf spark.eventLog.enabled=false \
+ --jars "lib/fastutil-8.3.1.jar,lib/snakeyaml-1.19.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/kal-test_${scala_version_val}-0.1.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.tencent.angel.spark.automl.AngelBayesianOptimization \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master} \
+ --conf spark.locality.wait=0s \
+ --conf spark.scheduler.minRegisteredResourcesRatio=1 \
+ --conf spark.driver.maxResultSize=10g \
+ --conf spark.network.timeout=60000s \
+ --conf spark.rpc.askTimeout=60000s \
+ --conf spark.executor.heartbeatInterval=600s \
+ --conf spark.eventLog.enabled=false \
+ --driver-class-path "lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/cov_run.sh b/tools/kal-test/bin/ml/cov_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..109c27f193e8c315bb823490b19fff3f93f9bc4b
--- /dev/null
+++ b/tools/kal-test/bin/ml/cov_run.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: e.g. CP10M1K/CP2M5K/CP1M10K"
+ echo "2nd argument: optimization algorithm or raw: [no/yes]"
+ echo "3rd argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/cov/cov_spark.properties
+dataset_name=$1
+is_raw=$2
+if_check=$3
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${cpu_name}
+executor_cores="executorCores_"${cpu_name}
+executor_memory="executorMemory_"${cpu_name}
+executor_memory_overhead="executorMemOverhead_"${cpu_name}
+extra_java_options="extraJavaOptions_"${cpu_name}
+driver_max_result_size="driverMaxResultSize"
+driver_cores="driverCores_"${cpu_name}
+driver_memory="driverMemory_"${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+executor_memory_overhead_val=${!executor_memory_overhead}
+extra_java_options_val=${!extra_java_options}
+driver_max_result_size_val=${!driver_max_result_size}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+echo ${cpu_name}
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${executor_memory_overhead} : ${executor_memory_overhead_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${driver_max_result_size} : ${driver_max_result_size_val}"
+
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${executor_memory_overhead_val} ] \
+ || [ ! ${driver_max_result_size_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+
+echo "start to submit spark jobs --- Cov-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.CovRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.memoryOverhead=${executor_memory_overhead_val}" \
+ --conf "spark.driver.maxResultSize=${driver_max_result_size_val}" \
+ --conf "spark.network.timeout=3600s" \
+ --conf "spark.executor.heartbeatInterval=1000s" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+else
+ scp lib/fastutil-8.3.1.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.CovRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.memoryOverhead=${executor_memory_overhead_val}" \
+ --conf "spark.driver.maxResultSize=${driver_max_result_size_val}" \
+ --jars "lib/fastutil-8.3.1.jar" \
+ --driver-class-path "lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+fi
+
diff --git a/tools/kal-test/bin/ml/dbscan_run.sh b/tools/kal-test/bin/ml/dbscan_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..650e092685dc08ceeefa8a582246e11c7e792aa5
--- /dev/null
+++ b/tools/kal-test/bin/ml/dbscan_run.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: e.g. bremenSmall/farm/house"
+ echo "2nd argument: optimization algorithm or raw: [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/dbscan/dbscan_spark.properties
+dataset_name=$1
+is_raw=$2
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}
+type=opt
+if [ $is_raw == "yes" ]; then
+ type=raw
+fi
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${type}
+executor_cores="executorCores_"${type}
+executor_memory="executorMemory_"${type}
+extra_java_options="extraJavaOptions_"${type}
+driver_cores="driverCores_"${type}
+driver_memory="driverMemory_"${type}
+master_="master"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+echo ${cpu_name}
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+
+
+if [ ${is_raw} == "yes" ]; then
+ driver_max_result_size="driverMaxResultSize_"${type}
+ epsilon="epsilon_"${dataset_name}_${type}
+ min_points="minPoints_"${dataset_name}_${type}
+
+ driver_max_result_size_val=${!driver_max_result_size}
+ epsilon_val=${!epsilon}
+ min_points_val=${!min_points}
+
+ echo "${driver_max_result_size} : ${driver_max_result_size_val}"
+ echo "${epsilon} : ${epsilon_val}"
+ echo "${min_points} : ${min_points_val}"
+
+
+ if [ ! ${driver_max_result_size_val} ] \
+ || [ ! ${epsilon_val} ] \
+ || [ ! ${min_points_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+ fi
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+outputPath="${save_resultPath_val}/dbscan/alitoukaDBSCAN/output_${dataset_name}"
+hdfsJarPath="hdfs:///tmp/ml/test/dbscan"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+mkdir -p log
+echo "start to submit spark jobs --- DBSCAN-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class org.apache.spark.ml.clustering.DBSCANRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.task.maxFailures=1" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} | tee ./log/log
+else
+ hdfs dfs -rm -r -f ${outputPath}
+ hdfs dfs -mkdir -p ${hdfsJarPath}
+ hdfs dfs -ls ${hdfsJarPath}
+ if [ $? -eq 0 ];then
+ hdfs dfs -rm -r -f ${hdfsJarPath}/alitouka_dbscan_2.11-0.1.jar
+ hdfs dfs -put ./lib/alitouka_dbscan_2.11-0.1.jar ${hdfsJarPath}
+ fi
+
+ spark-submit \
+ --jars "lib/scopt_2.11-3.5.0.jar" \
+ --class org.alitouka.spark.dbscan.DbscanDriver \
+ --deploy-mode ${deploy_mode_val} \
+ --name "alitouka_DBSCAN_${model_conf}" \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.driver.maxResultSize=${driver_max_result_size_val}" \
+ ${hdfsJarPath}/alitouka_dbscan_2.11-0.1.jar --ds-master ${master_val} --ds-jar ${hdfsJarPath}/alitouka_dbscan_${scala_version_val}-0.1.jar --ds-input ${data_path_val} --ds-output ${outputPath} --eps ${epsilon_val} --numPts ${min_points_val} >dbscan_tmp.log
+ CostTime=$(cat dbscan_tmp.log | grep "train total" | awk '{print $3}')
+ currentTime=$(date "+%Y%m%d_%H%M%S")
+ rm -rf dbscan_tmp.log
+ echo -e "algorithmName: DBSCAN\ncostTime: ${CostTime}\ndatasetName: ${dataset_name}\nisRaw: 'yes'\ntestcaseType: DBSCAN_opensource_${dataset_name}\n" > ./report/"DBSCAN_${dataset_name}_raw_${currentTime}.yml"
+ echo "Exec Successful: costTime: ${CostTime}" > ./log/log
+fi
+
diff --git a/tools/kal-test/bin/ml/dt_run.sh b/tools/kal-test/bin/ml/dt_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..1c5b79a88db696b7bc0055982e54a76bf64f87ad
--- /dev/null
+++ b/tools/kal-test/bin/ml/dt_run.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: type of algorithm: [classification/regression]"
+ echo "2nd argument: type of data structure: [dataframe/rdd]"
+ echo "3rd argument: name of dataset: [epsilon/higgs/mnist8m]"
+ echo "4th argument: name of API: [for dataframe: fit/fit1/fit2/fit3; for rdd: trainClassifier/trainRegressor]"
+ echo "5th argument: optimization algorithm or raw: [no/yes]"
+ echo "6th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 6 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/dt/dt_spark.properties
+algorithm_type=$1
+data_structure=$2
+dataset_name=$3
+api_name=$4
+is_raw=$5
+if_check=$6
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${algorithm_type}-${data_structure}-${dataset_name}-${api_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${cpu_name}_${algorithm_type}"_"${dataset_name}"_numExectuors"
+executor_cores=${cpu_name}_${algorithm_type}"_"${dataset_name}"_executorCores"
+executor_memory=${cpu_name}_${algorithm_type}"_"${dataset_name}"_executorMemory"
+extra_java_options=${cpu_name}_${algorithm_type}"_"${dataset_name}"_extraJavaOptions"
+driver_cores="driverCores"
+driver_memory="driverMemory"
+master_="master"
+deploy_mode="deployMode"
+max_failures="maxFailures"
+compress_="compress"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+max_failures_val=${!max_failures}
+compress_val=${!compress_}
+
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${max_failures} : ${max_failures_val}"
+echo "${compress_} : ${compress_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ] \
+ || [ ! ${max_failures_val} ] \
+ || [ ! ${compress_val} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+spark_conf=${master_val}_${deploy_mode_val}_${num_executors_val}_${executor_cores_val}_${executor_memory_val}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- dt-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar lib/fastutil-8.3.1.jar root@agent1:/opt/ml_classpath/
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar lib/fastutil-8.3.1.jar root@agent2:/opt/ml_classpath/
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar lib/fastutil-8.3.1.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.DTRunner \
+ --driver-java-options "-Xms15g" \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.taskmaxFailures=${max_failures_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.DTRunner \
+ --driver-java-options "-Xms15g" \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.taskmaxFailures=${max_failures_val}" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val} | tee ./log/log
+fi
diff --git a/tools/kal-test/bin/ml/dtb_run.sh b/tools/kal-test/bin/ml/dtb_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f18aa27202fefda4145fd4936cdd82b134325734
--- /dev/null
+++ b/tools/kal-test/bin/ml/dtb_run.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: higgs/mnist8m"
+ echo "2nd argument: name of API: fit/fit1/fit2/fit3"
+ echo "3rd argument: save or verify result: save/verify"
+ echo "4th argument: optimization algorithm or raw: no/yes"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 4 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/dtb/dtb_spark.properties
+dataset_name=$1
+api_name=$2
+verify=$3
+is_raw=$4
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${dataset_name}_${cpu_name}
+executor_cores="executorCores_"${dataset_name}_${cpu_name}
+executor_memory="executorMemory_"${dataset_name}_${cpu_name}
+extra_java_options="extraJavaOptions_"${dataset_name}_${cpu_name}
+driver_cores="driverCores"
+driver_memory="driverMemory"
+master_="master"
+deploy_mode="deployMode"
+max_failures="maxFailures"
+compress_="compress"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+max_failures_val=${!max_failures}
+compress_val=${!compress_}
+
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${max_failures} : ${max_failures_val}"
+echo "${compress_} : ${compress_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ] \
+ || [ ! ${max_failures_val} ] \
+ || [ ! ${compress_val} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+bucketedResPath="/tmp/ml/res/DTB_ref_bucketedRes/${is_raw}/${spark_version_val}/${dataset_name}"
+hdfs dfs -mkdir -p ${bucketedResPath}
+
+spark_conf=${master_val}_${deploy_mode_val}_${num_executors_val}_${executor_cores_val}_${executor_memory_val}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+model_conf=${dataset_name}-${api_name}-${verify}-${bucketedResPath}
+echo "start to submit spark jobs --- dtb-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.DTBRunner \
+ --driver-java-options "-Xms15g" \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.taskmaxFailures=${max_failures_val}" \
+ --jars "lib/snakeyaml-1.19.jar,lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${is_raw} ${spark_conf} | tee ./log/log
+else
+ scp lib/boostkit-ml-kernel-client_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar root@agent1:/opt/ml_classpath/
+ scp lib/boostkit-ml-kernel-client_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar root@agent2:/opt/ml_classpath/
+ scp lib/boostkit-ml-kernel-client_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.DTBRunner \
+ --driver-java-options "-Xms15g" \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.taskmaxFailures=${max_failures_val}" \
+ --jars "lib/snakeyaml-1.19.jar,lib/boostkit-ml-kernel-client-${scala_version_val}-${kal_version_val}-${spark_version_val}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${is_raw} ${spark_conf} | tee ./log/log
+fi
diff --git a/tools/kal-test/bin/ml/encoder_run.sh b/tools/kal-test/bin/ml/encoder_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3a99061284bbd6b58597475dcb74c4700f41be2e
--- /dev/null
+++ b/tools/kal-test/bin/ml/encoder_run.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1rd argument: name of dataset: e.g. encoder_400m,encoder_800m"
+ echo "2th argument: optimization algorithm or raw: [no/yes]"
+ echo "3th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+esac
+
+if [ $# -ne 3 ]; then
+ usage
+ exit 0
+fi
+
+
+source conf/ml/encoder/encoder_spark.properties
+dataset_name=$1
+is_raw=$2
+if_check=$3
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${cpu_name}_${dataset_name}"_numExecutors"
+executor_cores=${cpu_name}_${dataset_name}"_executorCores"
+executor_memory=${cpu_name}_${dataset_name}"_executorMemory"
+executor_extra_java_options=${cpu_name}_${dataset_name}"_extraJavaOptions"
+driver_cores=${cpu_name}_${dataset_name}"_driverCores"
+driver_memory=${cpu_name}_${dataset_name}"_driverMemory"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+executor_extra_java_options_val=${!executor_extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+
+echo "master : ${master}"
+echo "deployMode : ${deployMode}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${executor_extra_java_options} : ${executor_extra_java_options_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${executor_extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master} ] \
+ || [ ! ${deployMode} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+mkdir -p /data/data1/tmp/encoder
+localSavePath=/data/data1/tmp/encoder
+path_conf=${data_path_val},${localSavePath}
+
+echo "start to clean cache and sleep 3s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 3
+
+echo "start to submit spark jobs --- encoder-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ mkdir -p log
+ spark-submit \
+ --class com.bigdata.ml.EncoderRunner \
+ --master ${master} \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --conf spark.executor.extraJavaOptions=${executor_extra_java_options_val} \
+ --conf spark.rdd.compress=false \
+ --conf spark.eventLog.enabled=true \
+ --conf spark.driver.maxResultSize=40g \
+ --conf spark.network.timeout=60s \
+ --conf "spark.driver.extraJavaOptions=-Xss5g -Dlog4j.configuration=file:./log4j.properties" \
+ --driver-java-options "-Xms15g" \
+ --driver-class-path "lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --jars "lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${path_conf} ${cpu_name} ${save_resultPath_val}| tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.EncoderRunner \
+ --master ${master} \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --conf spark.executor.extraJavaOptions=${executor_extra_java_options_val} \
+ --conf spark.rdd.compress=false \
+ --conf spark.eventLog.enabled=true \
+ --conf spark.driver.maxResultSize=40g \
+ --conf spark.network.timeout=60s \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ --conf "spark.driver.extraJavaOptions=-Xss5g -Dlog4j.configuration=file:./log4j.properties" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${path_conf} ${cpu_name} ${save_resultPath_val}| tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/fm_run.sh b/tools/kal-test/bin/ml/fm_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8ae5603ba396f0c803327f535882a2a92d245272
--- /dev/null
+++ b/tools/kal-test/bin/ml/fm_run.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: type of algorithm: [classification/regression]"
+ echo "2nd argument: name of dataset: [epsilon/higgs/avazu/kdda]"
+ echo "3rd argument: name of API: [fit/fit1/fit2/fit3]"
+ echo "4th argument: optimization algorithm or raw: [no/yes]"
+ echo "5th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 5 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/fm/fm_spark.properties
+algorithm_type=$1
+dataset_name=$2
+api_name=$3
+is_raw=$4
+if_check=$5
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${algorithm_type}-${dataset_name}-${api_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${cpu_name}_${algorithm_type}_${dataset_name}"_numExectuors"
+executor_cores=${cpu_name}_${algorithm_type}_${dataset_name}"_executorCores"
+executor_memory=${cpu_name}_${algorithm_type}_${dataset_name}"_executorMemory"
+extra_java_options=${cpu_name}_${algorithm_type}_${dataset_name}"_extraJavaOptions"
+driver_cores="driverCores"
+driver_memory="driverMemory"
+master_="master"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+spark_conf=${master_val}_${deploy_mode_val}_${num_executors_val}_${executor_cores_val}_${executor_memory_val}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- fm-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/json4s-ext_2.12-3.2.11.jar lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/json4s-ext_2.12-3.2.11.jar lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/json4s-ext_2.12-3.2.11.jar lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.FMRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --driver-java-options "-Dlog4j.configuration=file:./log4j.properties -Dhdp.version=3.1.0.0-78" \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.network.timeout=3600s" \
+ --jars "lib/json4s-ext_2.12-3.2.11.jar,lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val}| tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.FMRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.network.timeout=3600s" \
+ --conf "spark.driver.maxResultSize=2G" \
+ --driver-class-path "lib/json4s-ext_2.12-3.2.11.jar:lib/snakeyaml-1.19.jar:lib/fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/fpg_run.sh b/tools/kal-test/bin/ml/fpg_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..e5e7f470cb54bcf4961e8870eaba644822bbd8fd
--- /dev/null
+++ b/tools/kal-test/bin/ml/fpg_run.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1rd argument: name of dataset: [Kosarak,Kosarak25,IBM700]"
+ echo "2th argument: optimization algorithm or raw: [no/yes]"
+ echo "3th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/fpg/fpg_spark.properties
+dataset_name=$1
+is_raw=$2
+if_check=$3
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${cpu_name}_${dataset_name}"_numExecutors"
+executor_cores=${cpu_name}_${dataset_name}"_executorCores"
+executor_memory=${cpu_name}_${dataset_name}"_executorMemory"
+executor_extra_java_options=${cpu_name}_${dataset_name}"_extraJavaOptions"
+driver_cores=${cpu_name}_${dataset_name}"_driverCores"
+driver_memory=${cpu_name}_${dataset_name}"_driverMemory"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+executor_extra_java_options_val=${!executor_extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+
+echo "master : ${master}"
+echo "deployMode : ${deployMode}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${executor_extra_java_options} : ${executor_extra_java_options_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${executor_extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master} ] \
+ || [ ! ${deployMode} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path=${!dataset_name}
+
+echo "${dataset_name} : ${data_path}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- fpg-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.FPGRunner \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master} \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_java_options_val}" \
+ --driver-java-options "-Dlog4j.configuration=file:./log4j.properties" \
+ --conf "spark.task.maxFailures=1" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.FPGRunner \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master} \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_java_options_val}" \
+ --conf "spark.task.maxFailures=1" \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/gbdt_run.sh b/tools/kal-test/bin/ml/gbdt_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b0897973325c9e58c5531aa62b382a10d22f160d
--- /dev/null
+++ b/tools/kal-test/bin/ml/gbdt_run.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: type of algorithm: [classification/regression]"
+ echo "2nd argument: type of data structure: [dataframe/rdd]"
+ echo "3rd argument: name of dataset: [epsilon/rcv/D10M4096libsvm]"
+ echo "4th argument: name of API: [for dataframe: fit/fit1/fit2/fit3; for rdd: rdd/javardd]"
+ echo "5th argument: optimization algorithm or raw: [no/yes]"
+ echo "6th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 6 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/gbdt/gbdt_spark.properties
+algorithm_type=$1
+data_structure=$2
+dataset_name=$3
+api_name=$4
+is_raw=$5
+if_check=$6
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${algorithm_type}-${data_structure}-${dataset_name}-${api_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors="numExectuors"
+executor_cores=${dataset_name}"_executorCores_"${cpu_name}
+executor_memory="executorMemory"
+extra_java_options="extraJavaOptions"
+driver_cores="driverCores"
+driver_memory="driverMemory"
+master_="master"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+echo ${cpu_name}
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path=${!dataset_name}
+
+echo "${dataset_name} : ${data_path}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- gbdt-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.GBDTRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --conf spark.dynamicAllocation.enabled=false \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.GBDTRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/hdb_run.sh b/tools/kal-test/bin/ml/hdb_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..644c72b6def8bca9f4c24600657af51db74a5e0a
--- /dev/null
+++ b/tools/kal-test/bin/ml/hdb_run.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1rd argument: name of dataset:Hibench1m_100,Hibench1m_200 "
+ echo "2th argument: optimization algorithm or raw: [no/yes]"
+ echo "3th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/hdb/hdb_spark.properties
+dataset_name=$1
+is_raw=$2
+if_check=$3
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${cpu_name}_${dataset_name}"_numExecutors"
+executor_cores=${cpu_name}_${dataset_name}"_executorCores"
+executor_memory=${cpu_name}_${dataset_name}"_executorMemory"
+executor_extra_java_options=${cpu_name}_${dataset_name}"_extraJavaOptions"
+driver_cores=${cpu_name}_${dataset_name}"_driverCores"
+driver_memory=${cpu_name}_${dataset_name}"_driverMemory"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+executor_extra_java_options_val=${!executor_extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+
+echo "master : ${master}"
+echo "deployMode : ${deployMode}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${executor_extra_java_options} : ${executor_extra_java_options_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${executor_extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master} ] \
+ || [ ! ${deployMode} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path=${!dataset_name}
+
+echo "${dataset_name} : ${data_path}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- hdb-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.HDBRunner \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master} \
+ --conf "spark.rpc.message.maxSize=1024" \
+ --conf "spark.driver.maxResultSize=4g" \
+ --conf "spark.task.maxFailures=100" \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_java_options_val}" \
+ --driver-java-options "-Dlog4j.configuration=file:./log4j.properties" \
+ --driver-class-path "lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar:lib/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar:/opt/ml_classpath/boostkit-graph-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.HDBRunner \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master} \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_java_options_val}" \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/idf_run.sh b/tools/kal-test/bin/ml/idf_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ecc8c54bb347fe0408bcdaede5ed49abdeabf233
--- /dev/null
+++ b/tools/kal-test/bin/ml/idf_run.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1rd argument: name of dataset: e.g. D10m200m"
+ echo "2th argument: optimization algorithm or raw: [no/yes]"
+ echo "3th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+esac
+
+if [ $# -ne 3 ]; then
+ usage
+ exit 0
+fi
+
+
+source conf/ml/idf/idf_spark.properties
+dataset_name=$1
+is_raw=$2
+if_check=$3
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${cpu_name}_${dataset_name}"_numExecutors"
+executor_cores=${cpu_name}_${dataset_name}"_executorCores"
+executor_memory=${cpu_name}_${dataset_name}"_executorMemory"
+executor_extra_java_options=${cpu_name}_${dataset_name}"_extraJavaOptions"
+driver_cores=${cpu_name}_${dataset_name}"_driverCores"
+driver_memory=${cpu_name}_${dataset_name}"_driverMemory"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+executor_extra_java_options_val=${!executor_extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+
+echo "master : ${master}"
+echo "deployMode : ${deployMode}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${executor_extra_java_options} : ${executor_extra_java_options_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${executor_extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master} ] \
+ || [ ! ${deployMode} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+
+echo "start to clean cache and sleep 3s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 3
+
+echo "start to submit spark jobs --- idf-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ mkdir -p log
+ spark-submit \
+ --class com.bigdata.ml.IDFRunner \
+ --master ${master} \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --conf spark.executor.extraJavaOptions=${executor_extra_java_options_val} \
+ --driver-java-options "-Xms15g" \
+ --driver-class-path "lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --jars "lib/snakeyaml-1.19.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val}| tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.IDFRunner \
+ --master ${master} \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --conf spark.executor.extraJavaOptions=${executor_extra_java_options_val} \
+ --driver-java-options "-Xms15g" \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ --jars "lib/snakeyaml-1.19.jar,lib/fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val}| tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/if_run.sh b/tools/kal-test/bin/ml/if_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..973ae805d705e21a503ea953b1c1576ff0e0e34e
--- /dev/null
+++ b/tools/kal-test/bin/ml/if_run.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1rd argument: name of dataset: [if_40M_1k/if_1M_1k]"
+ echo "2th argument: optimization algorithm or raw: [no/yes]"
+ echo "3th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/if/if_spark.properties
+dataset_name=$1
+is_raw=$2
+if_check=$3
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${cpu_name}_${dataset_name}"_numExecutors"
+executor_cores=${cpu_name}_${dataset_name}"_executorCores"
+executor_memory=${cpu_name}_${dataset_name}"_executorMemory"
+driver_cores=${cpu_name}_${dataset_name}"_driverCores"
+driver_memory=${cpu_name}_${dataset_name}"_driverMemory"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+
+echo "master : ${master}"
+echo "deployMode : ${deployMode}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master} ] \
+ || [ ! ${deployMode} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path=${!dataset_name}
+
+echo "${dataset_name} : ${data_path}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- if-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.IFRunner \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master} \
+ --driver-java-options "-Xms15g -Dlog4j.configuration=file:./log4j.properties" \
+ --conf "spark.driver.maxResultSize=2g" \
+ --conf "spark.sophon.isolationForest.parLevel=100" \
+ --jars "lib/isolation-forest_3.1.1_2.12-2.0.8.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/isolation-forest_3.1.1_2.12-2.0.8.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/kal-test_${scala_version_val}-0.1.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.IFRunner \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master} \
+ --driver-java-options "-Xms15g -Dlog4j.configuration=file:./log4j.properties" \
+ --conf "spark.driver.maxResultSize=2g" \
+ --driver-class-path "lib/isolation-forest_3.1.1_2.12-2.0.8.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar" \
+ --jars "lib/isolation-forest_3.1.1_2.12-2.0.8.jar,lib/boostkit-ml-kernel-client-${scala_version_val}-${kal_version_val}-${spark_version_val}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/kmeans_run.sh b/tools/kal-test/bin/ml/kmeans_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3d54c501ba7dba9d198c908c25b6bc95d0890e09
--- /dev/null
+++ b/tools/kal-test/bin/ml/kmeans_run.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: type of data structure: [dataframe/rdd]"
+ echo "2nd argument: name of dataset: e.g. D200M20"
+ echo "3rd argument: name of API: e.g. fit/fit1/fit2/fit3"
+ echo "4th argument: optimization algorithm or raw: [no/yes]"
+ echo "5th argument: Whether to Compare Results [no/yes]"
+}
+
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 5 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/kmeans/kmeans_spark.properties
+data_structure=$1
+dataset_name=$2
+api_name=$3
+is_raw=$4
+if_check=$5
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${data_structure}-${dataset_name}-${api_name}-${cpu_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${cpu_name}
+executor_cores="executorCores_"${cpu_name}
+executor_memory="executorMemory_"${cpu_name}
+extra_java_options="extraJavaOptions_"${cpu_name}
+driver_cores="driverCores_"${cpu_name}
+driver_memory="driverMemory_"${cpu_name}
+driver_java_options="driverJavaOptions_"${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_java_options_val=${!driver_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+echo ${cpu_name}
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${driver_java_options} : ${driver_java_options_val}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+echo "start to submit spark jobs --- KMeans-${model_conf}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- kmeans-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/mahout-core-0.9.jar lib/mahout-math-0.9.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/mahout-core-0.9.jar lib/mahout-math-0.9.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/mahout-core-0.9.jar lib/mahout-math-0.9.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.KMeansRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --driver-java-options ${driver_java_options_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/mahout-core-0.9.jar,lib/mahout-math-0.9.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/mahout-math-0.9.jar:lib/mahout-core-0.9.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/mahout-core-0.9.jar:/opt/ml_classpath/mahout-math-0.9.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${save_resultPath_val}| tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.KMeansRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --driver-java-options ${driver_java_options_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/mahout-core-0.9.jar,lib/mahout-math-0.9.jar" \
+ --driver-class-path "lib/mahout-math-0.9.jar:lib/mahout-core-0.9.jar:lib/snakeyaml-1.19.jar" \
+ --conf "spark.executor.extraClassPath=lib/mahout-core-0.9.jar:lib/mahout-math-0.9.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${save_resultPath_val}| tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/knn_run.sh b/tools/kal-test/bin/ml/knn_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a1ad21d3e6fca8176b584f19fd2d3f9357ea33eb
--- /dev/null
+++ b/tools/kal-test/bin/ml/knn_run.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: e.g. glove/gist/deep1b"
+ echo "2th argument: optimization algorithm or raw: [no/yes]"
+ echo "if u want to compare result, pls execute {./bin/compare/ml/KNNVerify.sh } "
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 2 ]; then
+ usage
+ exit 0
+fi
+
+
+source conf/ml/knn/knn_spark.properties
+dataset_name=$1
+is_raw=$2
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${dataset_name}_${cpu_name}
+executor_cores="executorCores_"${dataset_name}_${cpu_name}
+executor_memory="executorMemory_"${dataset_name}_${cpu_name}
+extra_java_options="extraJavaOptions_"${dataset_name}_${cpu_name}
+driver_cores="driverCores_"${dataset_name}_${cpu_name}
+driver_memory="driverMemory_"${dataset_name}_${cpu_name}
+memory_overhead="execMemOverhead_"${dataset_name}_${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+compress_="compress"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+memory_overhead_val=${!memory_overhead}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+compress_val=${!compress_}
+
+
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${memory_overhead} : ${memory_overhead_val}"
+echo "${compress_} : ${compress_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ] \
+ || [ ! ${memory_overhead_val} ] \
+ || [ ! ${compress_val} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+spark_conf=${master_val}_${deploy_mode_val}_${num_executors_val}_${executor_cores_val}_${executor_memory_val}
+
+mkdir -p log
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- KNN-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.KNNRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.executor.memory_overhead=${memory_overhead_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --conf "spark.rdd.compress=${compress_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.KNNRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.executor.memory_overhead=${memory_overhead_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --conf "spark.rdd.compress=${compress_val}" \
+ --driver-class-path "lib/snakeyaml-1.19.jar:lib/fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val} | tee ./log/log
+fi
diff --git a/tools/kal-test/bin/ml/lda_run.sh b/tools/kal-test/bin/ml/lda_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8e15ba5cb602ee84a45deee5f6c3e471ec2435b1
--- /dev/null
+++ b/tools/kal-test/bin/ml/lda_run.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: type of data structure: [dataframe/rdd]"
+ echo "2nd argument: name of dataset: e.g. nytimes/pubmed/D20M200K"
+ echo "3rd argument: name of API: e.g. fit/fit1/fit2/fit3/run"
+ echo "4th argument: optimization algorithm or raw: [no/yes]"
+ echo "5th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 5 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/lda/lda_spark.properties
+data_structure=$1
+dataset_name=$2
+api_name=$3
+is_raw=$4
+if_check=$5
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${data_structure}-${dataset_name}-${api_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${cpu_name}
+executor_cores="executorCores_"${cpu_name}
+executor_memory="executorMemory_"${cpu_name}
+extra_java_options="extraJavaOptions_"${cpu_name}
+driver_cores="driverCores_"${cpu_name}
+driver_memory="driverMemory_"${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+echo ${cpu_name}
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+mkdir -p log
+echo "start to submit spark jobs --- LDA-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.LDARunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.driver.cores=${driver_cores_val}" \
+ --conf "spark.task.cpus=${executor_cores_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --jars "lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.LDARunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/lgbm_run.sh b/tools/kal-test/bin/ml/lgbm_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b9e86227477a8646415ca619e6eb7b053c733c2e
--- /dev/null
+++ b/tools/kal-test/bin/ml/lgbm_run.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: type of algorithm: [classification/regression]"
+ echo "2nd argument: name of dataset:mnist8m, higgs,criteo "
+ echo "3rd argument: optimization algorithm or raw: [no/yes]"
+ echo "4th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 4 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/lgbm/lgbm_spark.properties
+algorithm_type=$1
+dataset_name=$2
+is_raw=$3
+if_check=$4
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${algorithm_type}-${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${cpu_name}_${dataset_name}"_numExecutors"
+executor_cores=${cpu_name}_${dataset_name}"_executorCores"
+executor_memory=${cpu_name}_${dataset_name}"_executorMemory"
+executor_extra_java_options=${cpu_name}_${dataset_name}"_extraJavaOptions"
+executor_memory_overhead=${cpu_name}_${dataset_name}"_executorMemOverhead"
+driver_cores=${cpu_name}_${dataset_name}"_driverCores"
+driver_memory=${cpu_name}_${dataset_name}"_driverMemory"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+executor_extra_java_options_val=${!executor_extra_java_options}
+executor_memory_overhead_val=${!executor_memory_overhead}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+
+echo "master : ${master}"
+echo "deployMode : ${deployMode}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${executor_memory_overhead} : ${executor_memory_overhead_val}"
+echo "${executor_extra_java_options} : ${executor_extra_java_options_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${executor_memory_overhead_val} ] \
+ || [ ! ${executor_extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master} ] \
+ || [ ! ${deployMode} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path=${!dataset_name}
+
+echo "${dataset_name} : ${data_path}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- lgbm-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/lightgbmlib.jar lib/mmlspark_${scala_version_val}_spark3.1.2-0.0.0+79-09152193.jar lib/boostkit-lightgbm-kernel_${scala_version_val}-1.3.0.jar lib/fastutil-8.3.1.jar root@agent1:/opt/ml_classpath/
+ scp lib/lightgbmlib.jar lib/mmlspark_${scala_version_val}_spark3.1.2-0.0.0+79-09152193.jar lib/boostkit-lightgbm-kernel_${scala_version_val}-1.3.0.jar lib/fastutil-8.3.1.jar root@agent2:/opt/ml_classpath/
+ scp lib/lightgbmlib.jar lib/mmlspark_${scala_version_val}_spark3.1.2-0.0.0+79-09152193.jar lib/boostkit-lightgbm-kernel_${scala_version_val}-1.3.0.jar lib/fastutil-8.3.1.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.LightGBMRunner \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --conf "spark.executor.memoryOverhead=${executor_memory_overhead_val}" \
+ --master ${master} \
+ --files=lib/lib_lightgbm_close.so \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_java_options_val}" \
+ --jars "lib/lightgbmlib.jar,lib/fastutil-8.3.1.jar,lib/mmlspark_${scala_version_val}_spark3.1.2-0.0.0+79-09152193.jar,lib/boostkit-lightgbm-kernel_${scala_version_val}-1.3.0.jar" \
+ --driver-class-path "lib/lightgbmlib.jar:lib/mmlspark_${scala_version_val}_spark3.1.2-0.0.0+79-09152193.jar:lib/boostkit-lightgbm-kernel_${scala_version_val}-1.3.0.jar:lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/lightgbmlib.jar:/opt/ml_classpath/mmlspark_${scala_version_val}_spark3.1.2-0.0.0+79-09152193.jar:/opt/ml_classpath/boostkit-lightgbm-kernel_${scala_version_val}-1.3.0.jar:/opt/ml_classpath/fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.LightGBMRunner \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master} \
+ --jars "lib/lightgbmlib.jar,lib/snakeyaml-1.19.jar,lib/fastutil-8.3.1.jar,lib/mmlspark_${scala_version_val}_spark3.1.2-0.0.0+79-09152193.jar" \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_java_options_val}" \
+ --driver-class-path "lib/lightgbmlib.jar,lib/snakeyaml-1.19.jar,lib/mmlspark_${scala_version_val}_spark3.1.2-0.0.0+79-09152193.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/lightgbmlib.jar:/opt/ml_classpath/mmlspark_${scala_version_val}_spark3.1.2-0.0.0+79-09152193.jar:/opt/ml_classpath/fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/linR_run.sh b/tools/kal-test/bin/ml/linR_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..3a8182a6525739cf288a446c85bd3e1221b0d2ee
--- /dev/null
+++ b/tools/kal-test/bin/ml/linR_run.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: e.g. mnist8m/Twitter/rcv"
+ echo "2nd argument: name of API: e.g. fit/fit1/fit2/fit3"
+ echo "3th argument: optimization algorithm or raw: [no/yes]"
+ echo "4th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 4 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/linR/linR_spark.properties
+dataset_name=$1
+api_name=$2
+is_raw=$3
+if_check=$4
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${api_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${cpu_name}
+executor_cores="executorCores_"${cpu_name}
+executor_memory="executorMemory_"${cpu_name}
+extra_java_options="extraJavaOptions_"${cpu_name}
+driver_cores="driverCores_"${cpu_name}
+driver_memory="driverMemory_"${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+max_failures="maxFailures"
+compress_="compress"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+max_failures_val=${!max_failures}
+compress_val=${!compress_}
+
+
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${max_failures} : ${max_failures_val}"
+echo "${compress_} : ${compress_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ] \
+ || [ ! ${max_failures_val} ] \
+ || [ ! ${compress_val} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+
+spark_conf=${master_val}_${deploy_mode_val}_${num_executors_val}_${executor_cores_val}_${executor_memory_val}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- LinR-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.LinRRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.taskmaxFailures=${max_failures_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --conf "spark.rdd.compress=${compress_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.LinRRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.taskmaxFailures=${max_failures_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --conf "spark.rdd.compress=${compress_val}" \
+ --driver-class-path "lib/snakeyaml-1.19.jar:lib/fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/logR_run.sh b/tools/kal-test/bin/ml/logR_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7ef64f05c52453c125aa40343614d596ee90f1a3
--- /dev/null
+++ b/tools/kal-test/bin/ml/logR_run.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: e.g. mnist8m/Twitter/rcv"
+ echo "2nd argument: name of API: e.g. fit/fit1/fit2/fit3"
+ echo "3th argument: optimization algorithm or raw: [no/yes]"
+ echo "4th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 4 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/logR/logR_spark.properties
+dataset_name=$1
+api_name=$2
+is_raw=$3
+if_check=$4
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${api_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${cpu_name}
+executor_cores="executorCores_"${cpu_name}
+executor_memory="executorMemory_"${cpu_name}
+extra_java_options="extraJavaOptions_"${cpu_name}
+driver_cores="driverCores_"${cpu_name}
+driver_memory="driverMemory_"${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+echo ${cpu_name}
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- LogR-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.LogRRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val}| tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.LogRRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --driver-class-path "lib/snakeyaml-1.19.jar:lib/fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/nmf_run.sh b/tools/kal-test/bin/ml/nmf_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6fe737663a9d0b454a01743be0e99adace817a26
--- /dev/null
+++ b/tools/kal-test/bin/ml/nmf_run.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1rd argument: name of dataset:CSJ, MT, Books, HibenchRating50mx10mx500m "
+ echo "2th argument: optimization algorithm or raw: [no/yes]"
+ echo "3th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/nmf/nmf_spark.properties
+dataset_name=$1
+is_raw=$2
+if_check=$3
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${cpu_name}_${dataset_name}"_numExecutors"
+executor_cores=${cpu_name}_${dataset_name}"_executorCores"
+executor_memory=${cpu_name}_${dataset_name}"_executorMemory"
+executor_memory_overhead=${cpu_name}_${dataset_name}"_executorMemOverhead"
+executor_extra_java_options=${cpu_name}_${dataset_name}"_extraJavaOptions"
+driver_cores=${cpu_name}_${dataset_name}"_driverCores"
+driver_memory=${cpu_name}_${dataset_name}"_driverMemory"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+executor_memory_overhead_val=${!executor_memory_overhead}
+executor_extra_java_options_val=${!executor_extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+
+echo "master : ${master}"
+echo "deployMode : ${deployMode}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${executor_memory_overhead} : ${executor_memory_overhead_val}"
+echo "${executor_extra_java_options} : ${executor_extra_java_options_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${executor_memory_overhead_val} ] \
+ || [ ! ${executor_extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master} ] \
+ || [ ! ${deployMode} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path=${!dataset_name}
+
+echo "${dataset_name} : ${data_path}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- nmf-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.NMFRunner \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master} \
+ --conf "spark.executor.memoryOverhead=${executor_memory_overhead_val}" \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_java_options_val} -Xss512m" \
+ --driver-java-options "-Dlog4j.configuration=file:./log4j.properties" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.NMFRunner \
+ --deploy-mode ${deployMode} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master} \
+ --conf "spark.executor.extraJavaOptions=${executor_extra_java_options_val} -Xss512m" \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ --jars "lib/boostkit-ml-kernel-client-${scala_version_val}-${kal_version_val}-${spark_version_val}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path} ${cpu_name} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/pca_run.sh b/tools/kal-test/bin/ml/pca_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..76d6db39d7adcfbf5b6b6e7d1bcebf1b8a1e7ec3
--- /dev/null
+++ b/tools/kal-test/bin/ml/pca_run.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: type of data structure: [dataframe/rdd]"
+ echo "2nd argument: name of dataset: e.g. D10M1K/D1M10K/MESH"
+ echo "3rd argument: name of API:[for dataframe: fit/fit1/fit2/fit3; for rdd: train]"
+ echo "4th argument: optimization algorithm or raw: [no/yes]"
+ echo "5th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 5 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/pca/pca_spark.properties
+data_structure=$1
+dataset_name=$2
+api_name=$3
+is_raw=$4
+if_check=$5
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${data_structure}-${dataset_name}-${api_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${dataset_name}"_numExectuors_"${cpu_name}
+executor_cores=${dataset_name}"_executorCores_"${cpu_name}
+executor_memory=${dataset_name}"_executorMemory_"${cpu_name}
+extra_java_options=${dataset_name}"_extraJavaOptions_"${cpu_name}
+driver_cores=${dataset_name}"_driverCores_"${cpu_name}
+driver_memory=${dataset_name}"_driverMemory_"${cpu_name}
+executor_memory_overhead=${dataset_name}"_executorMemoryOverhead_"${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+executor_memory_overhead_val=${!executor_memory_overhead}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+echo ${cpu_name}
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${executor_memory_overhead} : ${executor_memory_overhead_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${executor_memory_overhead_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- PCA-${model_conf}"
+
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.PCARunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.driver.maxResultSize=256G" \
+ --conf "spark.executor.memoryOverhead=${executor_memory_overhead_val}" \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val}| tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.PCARunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.driver.maxResultSize=256G" \
+ --conf "spark.executor.memoryOverhead=${executor_memory_overhead_val}" \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --driver-class-path "lib/snakeyaml-1.19.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val}| tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/pearson_run.sh b/tools/kal-test/bin/ml/pearson_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c5a4ddc8ba187a1a8a348e3bc4e9eedd3d6ac216
--- /dev/null
+++ b/tools/kal-test/bin/ml/pearson_run.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: type of data structure: [dataframe/rdd]"
+ echo "2nd argument: name of dataset: e.g. CP10M1K/CP2M5K/CP1M10K"
+ echo "3nd argument: optimization algorithm or raw: [no/yes]"
+ echo "4rd argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 4 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/pearson/pearson_spark.properties
+data_structure=$1
+dataset_name=$2
+is_raw=$3
+if_check=$4
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${data_structure}-${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${cpu_name}
+executor_cores="executorCores_"${cpu_name}
+executor_memory="executorMemory_"${cpu_name}
+extra_java_options="extraJavaOptions_"${cpu_name}
+driver_cores="driverCores_"${cpu_name}
+driver_memory="driverMemory_"${cpu_name}
+memory_overhead="execMemOverhead_"${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+dataset_output_=${dataset_name}"_output"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+memory_overhead_val=${!memory_overhead}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${memory_overhead} : ${memory_overhead_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${memory_overhead_val} ] \
+ || [ ! ${master_val} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+hdfs dfs -rm -r -f "${data_path_output}_${cpu_name}_${is_raw}"
+spark_conf=${master_val}_${deploy_mode_val}_${num_executors_val}_${executor_cores_val}_${executor_memory_val}
+
+mkdir -p log
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- Pearson-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.PearsonRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.executor.memoryOverhead=${memory_overhead_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val} | tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.PearsonRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.executor.memoryOverhead=${memory_overhead_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --driver-class-path "lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar" \
+ --jars "lib/snakeyaml-1.19.jar,lib/fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val}| tee ./log/log
+fi
diff --git a/tools/kal-test/bin/ml/ps_run.sh b/tools/kal-test/bin/ml/ps_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..85f4324968685cd982e8c996ee893af63096bd50
--- /dev/null
+++ b/tools/kal-test/bin/ml/ps_run.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: e.g. kosarak/IBM10M47/IBM100M47"
+ echo "2nd argument: optimization algorithm or raw: [no/yes]"
+ echo "3rd argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/ps/ps_spark.properties
+dataset_name=$1
+is_raw=$2
+if_check=$3
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${cpu_name}
+executor_cores="executorCores_"${cpu_name}
+executor_memory="executorMemory_"${cpu_name}
+extra_java_options="extraJavaOptions_"${cpu_name}
+driver_cores="driverCores_"${cpu_name}
+driver_memory="driverMemory_"${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+echo ${cpu_name}
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+mkdir -p log
+echo "start to submit spark jobs --- PrefixSpan-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.PrefixSpanRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.rdd.compress=false" \
+ --conf "spark.network.timeout=600s" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val}| tee ./log/log
+else
+ scp lib/fastutil-8.3.1.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.PrefixSpanRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.rdd.compress=false" \
+ --conf "spark.network.timeout=600s" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --driver-class-path "lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar" \
+ --jars "lib/fastutil-8.3.1.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val}| tee ./log/log
+fi
diff --git a/tools/kal-test/bin/ml/rf_run.sh b/tools/kal-test/bin/ml/rf_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..14da3c4ee23fd250a32c734a8fa87042b47574d4
--- /dev/null
+++ b/tools/kal-test/bin/ml/rf_run.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: type of algorithm: [classification/regression]"
+ echo "2nd argument: type of data structure: [dataframe/rdd]"
+ echo "3rd argument: name of dataset: [epsilon/higgs/mnist8m/rcv]"
+ echo "4th argument: name of API: [for dataframe: fit/fit1/fit2/fit3; for rdd: train/train1/train2]"
+ echo "5th argument: optimization algorithm or raw: [no/yes]"
+ echo "6th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 6 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/rf/rf_spark.properties
+algorithm_type=$1
+data_structure=$2
+dataset_name=$3
+api_name=$4
+is_raw=$5
+if_check=$6
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${algorithm_type}-${data_structure}-${dataset_name}-${api_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors=${cpu_name}_${algorithm_type}_${dataset_name}"_numExectuors"
+executor_cores=${cpu_name}_${algorithm_type}_${dataset_name}"_executorCores"
+executor_memory=${cpu_name}_${algorithm_type}_${dataset_name}"_executorMemory"
+extra_java_options=${cpu_name}_${algorithm_type}_${dataset_name}"_extraJavaOptions"
+driver_cores="driverCores"
+driver_memory="driverMemory"
+master_="master"
+deploy_mode="deployMode"
+max_failures="maxFailures"
+compress_="compress"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+max_failures_val=${!max_failures}
+compress_val=${!compress_}
+
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${max_failures} : ${max_failures_val}"
+echo "${compress_} : ${compress_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ] \
+ || [ ! ${max_failures_val} ] \
+ || [ ! ${compress_val} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+spark_conf=${master_val}_${deploy_mode_val}_${num_executors_val}_${executor_cores_val}_${executor_memory_val}
+
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- rf-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.RFRunner \
+ --driver-java-options "-Xms15g" \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.taskmaxFailures=${max_failures_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --conf "spark.rdd.compress=${compress_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val}| tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.RFRunner \
+ --driver-java-options "-Xms15g" \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.taskmaxFailures=${max_failures_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --conf "spark.rdd.compress=${compress_val}" \
+ --driver-class-path "lib/snakeyaml-1.19.jar:lib/fastutil-8.3.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val} | tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/simrank_run.sh b/tools/kal-test/bin/ml/simrank_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..38d4dbda9a1dc4bea3291353c10e1af1be511e33
--- /dev/null
+++ b/tools/kal-test/bin/ml/simrank_run.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: e.g. simrank3w"
+ echo "2th argument: optimization algorithm or raw: [no/yes]"
+ echo "3th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 3 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/simrank/simrank_spark.properties
+dataset_name=$1
+is_raw=$2
+if_check=$3
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${cpu_name}
+executor_cores="executorCores_"${cpu_name}
+executor_memory="executorMemory_"${cpu_name}
+extra_java_options="extraJavaOptions_"${cpu_name}
+driver_cores="driverCores_"${cpu_name}
+driver_memory="driverMemory_"${cpu_name}
+executor_memory_overhead="execMemOverhead_"${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+executor_memory_overhead_val=${!executor_memory_overhead}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${executor_memory_overhead} : ${executor_memory_overhead_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+mkdir -p log
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- SimRank-${model_conf}"
+if [[ ${is_raw} == "no" ]]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.SimRankRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.memoryOverhead=${executor_memory_overhead_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val}| tee ./log/log
+else
+ scp lib/fastutil-8.3.1.jar lib/kal-test_${scala_version_val}-0.1.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/kal-test_${scala_version_val}-0.1.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/kal-test_${scala_version_val}-0.1.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.SimRankRunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.memoryOverhead=${executor_memory_overhead_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/kal-test_${scala_version_val}-0.1.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/kal-test_${scala_version_val}-0.1.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${save_resultPath_val}| tee ./log/log
+fi
\ No newline at end of file
diff --git a/tools/kal-test/bin/ml/spca_run.sh b/tools/kal-test/bin/ml/spca_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c849fd1eb932fbdb3d601e89c80b3af91eb0b5ef
--- /dev/null
+++ b/tools/kal-test/bin/ml/spca_run.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: "
+ echo "1st argument: name of dataset: e.g. Kemelmacher/mesh_deform/wathen100/MOLIERE"
+ echo "2th argument: name of API: e.g. fit/fit1/fit2/fit3"
+ echo "3th argument: optimization algorithm or raw: [no/yes]"
+ echo "4th argument: Whether to Compare Results [no/yes]"
+}
+
+case "$1" in
+-h | --help | ?)
+ usage
+ exit 0
+ ;;
+esac
+
+if [ $# -ne 4 ]; then
+ usage
+ exit 0
+fi
+
+source conf/ml/spca/spca_spark.properties
+dataset_name=$1
+api_name=$2
+is_raw=$3
+if_check=$4
+cpu_name=$(lscpu | grep Architecture | awk '{print $2}')
+model_conf=${dataset_name}-${api_name}-${is_raw}-${if_check}
+
+# concatnate strings as a new variable
+num_executors="numExectuors_"${dataset_name}_${cpu_name}
+executor_cores="executorCores_"${dataset_name}_${cpu_name}
+executor_memory="executorMemory_"${dataset_name}_${cpu_name}
+extra_java_options="extraJavaOptions_"${dataset_name}_${cpu_name}
+driver_cores="driverCores_"${dataset_name}_${cpu_name}
+driver_memory="driverMemory_"${dataset_name}_${cpu_name}
+memory_overhead="execMemOverhead_"${dataset_name}_${cpu_name}
+master_="master"
+deploy_mode="deployMode"
+compress_="compress"
+
+num_executors_val=${!num_executors}
+executor_cores_val=${!executor_cores}
+executor_memory_val=${!executor_memory}
+extra_java_options_val=${!extra_java_options}
+driver_cores_val=${!driver_cores}
+driver_memory_val=${!driver_memory}
+memory_overhead_val=${!memory_overhead}
+master_val=${!master_}
+deploy_mode_val=${!deploy_mode}
+compress_val=${!compress_}
+
+
+echo "${master_} : ${master_val}"
+echo "${deploy_mode} : ${deploy_mode_val}"
+echo "${driver_cores} : ${driver_cores_val}"
+echo "${driver_memory} : ${driver_memory_val}"
+echo "${num_executors} : ${num_executors_val}"
+echo "${executor_cores}: ${executor_cores_val}"
+echo "${executor_memory} : ${executor_memory_val}"
+echo "${extra_java_options} : ${extra_java_options_val}"
+echo "${memory_overhead} : ${memory_overhead_val}"
+echo "${compress_} : ${compress_val}"
+echo "cpu_name : ${cpu_name}"
+
+if [ ! ${num_executors_val} ] \
+ || [ ! ${executor_cores_val} ] \
+ || [ ! ${executor_memory_val} ] \
+ || [ ! ${extra_java_options_val} ] \
+ || [ ! ${driver_cores_val} ] \
+ || [ ! ${driver_memory_val} ] \
+ || [ ! ${master_val} ] \
+ || [ ! ${memory_overhead_val} ] \
+ || [ ! ${compress_val} ] \
+ || [ ! ${cpu_name} ]; then
+ echo "Some values are NULL, please confirm with the property files"
+ exit 0
+fi
+
+
+source conf/ml/ml_datasets.properties
+spark_version=sparkVersion
+spark_version_val=${!spark_version}
+kal_version=kalVersion
+kal_version_val=${!kal_version}
+scala_version=scalaVersion
+scala_version_val=${!scala_version}
+save_resultPath=saveResultPath
+save_resultPath_val=${!save_resultPath}
+data_path_val=${!dataset_name}
+echo "${dataset_name} : ${data_path_val}"
+
+
+spark_conf=${master_val}_${deploy_mode_val}_${num_executors_val}_${executor_cores_val}_${executor_memory_val}
+
+mkdir -p log
+echo "start to clean cache and sleep 30s"
+ssh server1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent1 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent2 "echo 3 > /proc/sys/vm/drop_caches"
+ssh agent3 "echo 3 > /proc/sys/vm/drop_caches"
+sleep 30
+
+echo "start to submit spark jobs --- SPCA-${model_conf}"
+if [ ${is_raw} == "no" ]; then
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent1:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent2:/opt/ml_classpath/
+ scp lib/fastutil-8.3.1.jar lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar root@agent3:/opt/ml_classpath/
+
+ spark-submit \
+ --class com.bigdata.ml.SPCARunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.executor.memory_overhead=${memory_overhead_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --conf "spark.rdd.compress=${compress_val}" \
+ --jars "lib/fastutil-8.3.1.jar,lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar,lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --driver-class-path "lib/kal-test_${scala_version_val}-0.1.jar:lib/fastutil-8.3.1.jar:lib/snakeyaml-1.19.jar:lib/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ --conf "spark.executor.extraClassPath=/opt/ml_classpath/fastutil-8.3.1.jar:/opt/ml_classpath/boostkit-ml-acc_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-core_${scala_version_val}-${kal_version_val}-${spark_version_val}.jar:/opt/ml_classpath/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val}| tee ./log/log
+else
+ spark-submit \
+ --class com.bigdata.ml.SPCARunner \
+ --deploy-mode ${deploy_mode_val} \
+ --driver-cores ${driver_cores_val} \
+ --driver-memory ${driver_memory_val} \
+ --num-executors ${num_executors_val} \
+ --executor-cores ${executor_cores_val} \
+ --executor-memory ${executor_memory_val} \
+ --master ${master_val} \
+ --jars "lib/snakeyaml-1.19.jar,lib/boostkit-ml-kernel-client-${scala_version_val}-${kal_version_val}-${spark_version_val}.jar" \
+ --conf "spark.executor.extraJavaOptions=${extra_java_options_val}" \
+ --conf "spark.executor.instances=${num_executors_val}" \
+ --conf "spark.executor.memory_overhead=${memory_overhead_val}" \
+ --conf "spark.driver.maxResultSize=256G" \
+ --conf "spark.rdd.compress=${compress_val}" \
+ --driver-class-path "lib/snakeyaml-1.19.jar:lib/fastutil-8.3.1.jar:lib/boostkit-ml-kernel-${scala_version_val}-${kal_version_val}-${spark_version_val}-${cpu_name}.jar" \
+ ./lib/kal-test_${scala_version_val}-0.1.jar ${model_conf} ${data_path_val} ${cpu_name} ${spark_conf} ${save_resultPath_val}| tee ./log/log
+fi
diff --git a/tools/kal-test/bin/ml/spearman_run.sh b/tools/kal-test/bin/ml/spearman_run.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d23cc4b6607c9965565bce0554222374a6908447
--- /dev/null
+++ b/tools/kal-test/bin/ml/spearman_run.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+set -e
+
+function usage() {
+ echo "Usage: