diff --git a/graph_net/sample_pass/fusible_subgraph_ranges_generator.py b/graph_net/sample_pass/fusible_subgraph_ranges_generator.py index ad3fe8c78..69595a5f2 100644 --- a/graph_net/sample_pass/fusible_subgraph_ranges_generator.py +++ b/graph_net/sample_pass/fusible_subgraph_ranges_generator.py @@ -4,6 +4,7 @@ import json from itertools import groupby from dataclasses import dataclass +from collections import defaultdict class FusibleSubgraphRangesGenerator(SamplePass, ResumableSamplePassMixin): @@ -91,7 +92,6 @@ def analyze(self): naive_proposal_fused_num_ops_lists = [ sorted(set(num_ops_list)) for _, num_ops_list in num_kernels_and_num_ops_list - if len(set(num_ops_list)) > 1 ] proposal_fused_num_ops_lists = self._merge_all_decreasing_num_ops_lists( analysis_ctx, naive_proposal_fused_num_ops_lists @@ -114,6 +114,7 @@ def _merge_all_decreasing_num_ops_lists(self, analysis_ctx, num_ops_lists): break dead_loop_detect_cnt += 1 assert dead_loop_detect_cnt < kLimit, f"{dead_loop_detect_cnt=}" + num_ops_lists = [op_list for op_list in num_ops_lists if len(op_list) > 1] return num_ops_lists def _merge_one_decreasing_num_ops_lists(self, analysis_ctx, num_ops_lists): @@ -138,7 +139,6 @@ def get_next_head_num_kernels(i): return analysis_ctx.num_kernels4num_ops(num_ops_lists[i + 1][0]) for i in range(len(num_ops_lists) - 1): - assert len(num_ops_lists[i]) > 1 if get_cur_tail_num_kernels(i) >= get_next_head_num_kernels(i): return i return None @@ -152,14 +152,14 @@ def is_a_range(int_list): assert len(int_list) > 1 return (int_list[-1] + 1) - int_list[0] == len(int_list) - def have_any_increasing(num_ops_list: list[int]): - for i, cur_num_ops in enumerate(num_ops_list): - if i == 0: - continue - cur_num_kernels = analysis_ctx.num_kernels4num_ops(cur_num_ops) - last_num_kernels = analysis_ctx.num_kernels4num_ops(num_ops_list[i - 1]) - if cur_num_kernels > last_num_kernels: + def have_tail_increasing(num_ops_list: list[int]): + for i in range(len(num_ops_list) - 1, 0, -1): + cur_num_kernels = analysis_ctx.num_kernels4num_ops(num_ops_list[i]) + prev_num_kernels = analysis_ctx.num_kernels4num_ops(num_ops_list[i - 1]) + if cur_num_kernels > prev_num_kernels: return True + elif cur_num_kernels < prev_num_kernels: + return False return False def head_eq_tail(num_ops_list: list[int]): @@ -174,9 +174,9 @@ def head_gt_tail(num_ops_list: list[int]): def valid_fused_ops(num_ops_list: list[int]): if head_gt_tail(num_ops_list): - return True + return not have_tail_increasing(num_ops_list) if head_eq_tail(num_ops_list): - return not have_any_increasing(num_ops_list) + return not have_tail_increasing(num_ops_list) return False proposal_fused_num_ops_lists = [ @@ -243,7 +243,44 @@ def get_num_ops(pair): (num_kernels, [num_ops for _, num_ops in group]) for num_kernels, group in grouped_num_kernels_and_num_ops ] - return num_kernels_and_num_ops_list + + num_kernels_to_indexes = defaultdict(list) + + for i, (num_kernels, _) in enumerate(num_kernels_and_num_ops_list): + num_kernels_to_indexes[num_kernels].append(i) + + num_kernels_and_num_ops_closure_list = [ + (num_kernels, num_ops) + for num_kernels, indexes in num_kernels_to_indexes.items() + for i in range(min(indexes), max(indexes) + 1) + for num_ops in num_kernels_and_num_ops_list[i][1] + ] + num_kernels_and_num_ops_closure_list = sorted( + num_kernels_and_num_ops_closure_list, key=lambda pair: pair[1] + ) + num_ops_and_grouped_num_kernels_list = groupby( + num_kernels_and_num_ops_closure_list, key=lambda pair: pair[1] + ) + + min_num_kernels_and_num_ops_list = [ + ( + min(num_kernels for num_kernels, _, in num_kernels_and_num_ops_list), + num_ops, + ) + for num_ops, num_kernels_and_num_ops_list in num_ops_and_grouped_num_kernels_list + ] + + min_num_kernels_and_num_ops = sorted( + min_num_kernels_and_num_ops_list, key=lambda pair: pair[1] + ) + grouped_min_num_kernels_and_num_ops = groupby( + min_num_kernels_and_num_ops, key=lambda pair: pair[0] + ) + min_num_kernels_and_num_ops_list = [ + (num_kernels, [num_ops for _, num_ops in group]) + for num_kernels, group in grouped_min_num_kernels_and_num_ops + ] + return min_num_kernels_and_num_ops_list @dataclass diff --git a/graph_net/tools/generate_single_op_dataset.sh b/graph_net/tools/generate_single_op_dataset.sh index 8781913bf..e8d61c673 100644 --- a/graph_net/tools/generate_single_op_dataset.sh +++ b/graph_net/tools/generate_single_op_dataset.sh @@ -1,6 +1,8 @@ #!/bin/bash set -e +export CUDA_VISIBLE_DEVICES="0" + # ============================================================================== # Configuration Area # ============================================================================== @@ -18,12 +20,11 @@ if [ -z "$GRAPH_NET_ROOT" ]; then exit 1 fi -RESUME="false" +RESUME="true" # Workspace Setup -TIMESTAMP=$(date +%Y%m%d_%H%M) -WORKSPACE="/tmp/single_op_workspace_${TIMESTAMP}" -MODEL_LIST="${MODEL_LIST:-${GRAPH_NET_ROOT}/graph_net/config/small100_torch_samples_list.txt}" +WORKSPACE="/tmp/single_op_workspace" +MODEL_LIST="${GRAPH_NET_ROOT}/graph_net/config/small100_torch_samples_list.txt" # Output Directories OP_NAMES_DIR="${WORKSPACE}/01_op_names" @@ -31,6 +32,13 @@ RANGES_DIR="${WORKSPACE}/02_ranges" RAW_SUBGRAPH_DIR="${WORKSPACE}/03_raw_subgraphs" RENAMED_DIR="${WORKSPACE}/04_renamed" DEDUPLICATED_DIR="${WORKSPACE}/05_deduplicated" +DTYPE_GENERALIZED_SUBGRAPH_DIR="${WORKSPACE}/06_dtype_generalized_subgraphs" + +if [[ "$MODEL_LIST" == *"/torch_samples_list.txt" ]]; then + USE_SUBPROCESS_ARGS="--use-subprocess" +else + USE_SUBPROCESS_ARGS="" +fi mkdir -p "$WORKSPACE" @@ -42,73 +50,96 @@ echo ">>> Starting Pipeline..." echo " Python: $PYTHON_EXEC" echo " Root: $GRAPH_NET_ROOT" -# 1. Prepare Data if [ ! -f "$MODEL_LIST" ]; then echo "Error: Model list not found at $MODEL_LIST" exit 1 fi -# 2. Stage 1: Op Names -echo ">>> Running Stage 1: Op Names..." -python3 -m graph_net.model_path_handler \ - --model-path-list "${MODEL_LIST}" \ - --handler-config=$(base64 -w 0 <>> Generate subgraph_sample_list for samples under ${target_dir}." + echo ">>>" + find ${target_dir} -name "model.py" \ + | xargs dirname \ + | xargs realpath --relative-to=${target_dir} \ + | tee $sample_list +} + +function generate_op_names() { + # Stage 1: Op Names + echo ">>> Running Stage 1: Op Names..." + python3 -m graph_net.model_path_handler \ + --model-path-list "${MODEL_LIST}" \ + --handler-config=$(base64 -w 0 <>> Running Stage 2: Ranges..." -python3 -m graph_net.apply_sample_pass \ - --model-path-list "${MODEL_LIST}" \ - --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/sample_pass/op_extract_points_generator.py" \ - --sample-pass-class-name "OpExtractPointsGenerator" \ - --sample-pass-config=$(base64 -w 0 <>> Running Stage 2: Ranges..." + python3 -m graph_net.apply_sample_pass \ + --model-path-list "${MODEL_LIST}" \ + --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/sample_pass/op_extract_points_generator.py" \ + --sample-pass-class-name "OpExtractPointsGenerator" \ + --sample-pass-config=$(base64 -w 0 <>> Running Stage 3: Decompose..." -python3 -m graph_net.model_path_handler \ - --model-path-list "${MODEL_LIST}" \ - --handler-config=$(base64 -w 0 <>> Running Stage 3: Decompose..." + python3 -m graph_net.model_path_handler $USE_SUBPROCESS_ARGS \ + --model-path-list "${MODEL_LIST}" \ + --handler-config=$(base64 -w 0 <>> Generating generated_subgraphs_list.txt..." -find ${RAW_SUBGRAPH_DIR} -name "model.py" \ - | xargs dirname \ - | xargs realpath --relative-to=${RAW_SUBGRAPH_DIR} \ - > "${WORKSPACE}/generated_subgraphs_list.txt" - -# 6. Post-processing: Rename -echo ">>> Running Post-processing: Rename..." -python3 -m graph_net.model_path_handler \ - --model-path-list "${WORKSPACE}/generated_subgraphs_list.txt" \ - --handler-config=$(base64 -w 0 <>> Running Post-processing: Rename..." + python3 -m graph_net.model_path_handler \ + --model-path-list "${WORKSPACE}/generated_subgraphs_list.txt" \ + --handler-config=$(base64 -w 0 <>> Running Post-processing: Deduplicate..." + if [ -d "${DEDUPLICATED_DIR}" ]; then rm -rf "${DEDUPLICATED_DIR}"; fi + + python3 -m graph_net.tools.deduplicated \ + --samples-dir ${RENAMED_DIR} \ + --target-dir ${DEDUPLICATED_DIR} +} + +function dtype_generalizer() { + # Stage 6: Dtype generalization + echo ">>> Data type generalizer for samples under ${DEDUPLICATED_DIR}." + echo ">>>" + python3 -m graph_net.apply_sample_pass \ + --use-subprocess \ + --model-path-list ${WORKSPACE}/deduplicated_subgraphs_list.txt \ + --sample-pass-file-path "$GRAPH_NET_ROOT/graph_net/torch/sample_pass/dtype_generalizer.py" \ + --sample-pass-class-name ApplyDataTypeGeneralizationPasses \ + --sample-pass-config $(base64 -w 0 <>> Running Post-processing: Deduplicate..." -if [ -d "${DEDUPLICATED_DIR}" ]; then rm -rf "${DEDUPLICATED_DIR}"; fi +function main() { + TIMESTAMP=$(date +%Y%m%d_%H%M) -python3 -m graph_net.tools.deduplicated \ - --samples-dir ${RENAMED_DIR} \ - --target-dir ${DEDUPLICATED_DIR} + generate_op_names 2>&1 | tee ${WORKSPACE}/log_op_names_${TIMESTAMP}.txt + extract_op_points 2>&1 | tee ${WORKSPACE}/log_extract_op_points_${TIMESTAMP}.txt + generate_subgraphs 2>&1 | tee ${WORKSPACE}/log_generate_subgraphs_${TIMESTAMP}.txt + generate_generalized_subgraph_list ${RAW_SUBGRAPH_DIR} ${WORKSPACE}/generated_subgraphs_list.txt + + rename_subgraphs 2>&1 | tee ${WORKSPACE}/log_rename_subgraphs_${TIMESTAMP}.txt + deduplicate_subgraphs 2>&1 | tee ${WORKSPACE}/log_deduplicated_subgraphs_${TIMESTAMP}.txt + generate_generalized_subgraph_list ${DEDUPLICATED_DIR} ${WORKSPACE}/deduplicated_subgraphs_list.txt -# Copy generated_subgraphs_list.txt to final output -cp "${WORKSPACE}/generated_subgraphs_list.txt" "${DEDUPLICATED_DIR}/" + dtype_generalizer 2>&1 | tee ${WORKSPACE}/log_dtype_generalizer_${TIMESTAMP}.txt + + echo ">>> ALL DONE. Final dataset located at: ${DEDUPLICATED_DIR}" +} -echo ">>> ALL DONE. Final dataset located at: ${DEDUPLICATED_DIR}" -echo ">>> generated_subgraphs_list.txt also saved to: ${DEDUPLICATED_DIR}/generated_subgraphs_list.txt" +main \ No newline at end of file diff --git a/graph_net/tools/generate_subgraph_dataset.sh b/graph_net/tools/generate_subgraph_dataset.sh index b381ea711..b729d5453 100755 --- a/graph_net/tools/generate_subgraph_dataset.sh +++ b/graph_net/tools/generate_subgraph_dataset.sh @@ -13,6 +13,9 @@ GRAPH_NET_ROOT=$(python3 -c "import graph_net; import os; print(os.path.dirname( RESUME="true" DECOMPOSE_WORKSPACE=/tmp/subgraph_dataset_workspace +OUTPUT_DIR=$DECOMPOSE_WORKSPACE/outputs +DB_PATH=$OUTPUT_DIR/GraphNet.db + DEVICE_REWRITED_SAMPLE_DIR=$DECOMPOSE_WORKSPACE/01_device_rewrited_samples DIM_GENERALIZED_SAMPLE_DIR=$DECOMPOSE_WORKSPACE/02_dimension_generalized_samples SAMPLE_OP_NAMES_DIR=$DECOMPOSE_WORKSPACE/03_sample_op_names @@ -29,19 +32,19 @@ DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/11_dimension_generaliz RENAMED_DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/12_renamed_dimension_generalized_fusible_subgraphs DEDUP_DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/13_deduplicated_dimension_generalized_fusible_subgraphs DTYPE_GENERALIZED_FUSIBLE_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/14_dtype_generalized_fusible_subgraphs -FUSIBLE_SUBGRAPH_UNITTEST_DIR=$DECOMPOSE_WORKSPACE/15_fusible_subgraphs_unittests +# FUSIBLE_SUBGRAPH_UNITTEST_DIR=$DECOMPOSE_WORKSPACE/15_fusible_subgraphs_unittests # typical_subgraphs DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/2-08_dimension_generalized_typical_subgraphs RENAMED_DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/2-09_renamed_dimension_generalized_typical_subgraphs DEDUP_DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/2-10_deduplicated_dimension_generalized_typical_subgraphs DTYPE_GENERALIZED_TYPICAL_SUBGRAPH_DIR=$DECOMPOSE_WORKSPACE/2-11_dtype_generalized_typical_subgraphs -TYPICAL_SUBGRAPH_UNITTEST_DIR=$DECOMPOSE_WORKSPACE/2-12_typical_kernelbench_unittests +# TYPICAL_SUBGRAPH_UNITTEST_DIR=$DECOMPOSE_WORKSPACE/2-12_typical_kernelbench_unittests -mkdir -p "$DECOMPOSE_WORKSPACE" +mkdir -p $DECOMPOSE_WORKSPACE +mkdir -p $OUTPUT_DIR -model_list="$GRAPH_NET_ROOT/graph_net/config/torch_samples_list.txt" -DB_PATH=$DECOMPOSE_WORKSPACE/small100_torch_samples.db +model_list="$GRAPH_NET_ROOT/graph_net/config/small100_torch_samples_list.txt" device_rewrited_sample_list=${DECOMPOSE_WORKSPACE}/device_rewrited_sample_list.txt range_decomposed_subgraph_list=${DECOMPOSE_WORKSPACE}/range_decomposed_subgraph_sample_list.txt @@ -156,6 +159,7 @@ function dimension_generalizer(){ } EOF ) + cp -rf $DEVICE_REWRITED_SAMPLE_DIR $DIM_GENERALIZED_SAMPLE_DIR/9 } function generate_op_names() { @@ -321,7 +325,7 @@ EOF function subgraph_dimension_generalizer(){ echo ">>> [9] Generate dimension generalized subgraph samples under ${DIM_GENERALIZED_SAMPLE_DIR}." - for index in {0..8}; do + for index in {0..9}; do echo ">>> Generating dimension generalized subgraph variant index: ${index}" dimension_generalized_sample_list="${DIM_GENERALIZED_SAMPLE_DIR}/${index}/dimension_generalized_sample_list.txt" generate_subgraph_list ${DIM_GENERALIZED_SAMPLE_DIR}/${index} ${dimension_generalized_sample_list} @@ -373,7 +377,7 @@ EOF function remove_duplicate_dimension_generalized_fusible_graphs() { echo ">>> [11] Remove duplicated subgraph samples under ${RENAMED_DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}." echo ">>>" - for index in {0..8}; do + for index in {0..9}; do python3 -m graph_net.tools.deduplicated \ --samples-dir ${RENAMED_DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} \ --target-dir ${DEDUP_DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} @@ -579,11 +583,11 @@ function generate_fusible_subgraphs() { generate_generalized_subgraph_list ${DEDUP_DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} ${deduplicated_fusible_subgraphs_list} # dtype generalization - dtype_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dtype_generalizer_${suffix}.txt + dtype_generalizer 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_dtype_generalizer_fusible_subgraphs_${suffix}.txt generate_generalized_subgraph_list ${DTYPE_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} ${dtype_generalized_subgraphs_list} # generate kernelbench format unittest - generate_unittests 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_${suffix}.txt + # generate_unittests 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_${suffix}.txt } function generate_typical_subgraphs() { @@ -603,24 +607,49 @@ function generate_typical_subgraphs() { generate_generalized_subgraph_list ${DTYPE_GENERALIZED_TYPICAL_SUBGRAPH_DIR} ${dtype_generalized_typical_subgraph_list} # generate kernelbench format unittest - generate_unittest_for_typical_subgraphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_typical_subgraphs_${suffix}.txt + # generate_unittest_for_typical_subgraphs 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_unittests_typical_subgraphs_${suffix}.txt } -function main() { +function generate_database() { timestamp=`date +%Y%m%d_%H%M` # init database - python ${GRAPH_NET_ROOT}/sqlite/init_db.py --db_path ${DB_PATH} 2>&1 | tee sqlite/logs/init_db_${timestamp}.log - insert_graph_sample ${GRAPH_NET_ROOT} "github_torch_samples" "full_graph" ${model_list} + if [ ! -f ${DB_PATH} ]; then + python ${GRAPH_NET_ROOT}/sqlite/init_db.py --db_path ${DB_PATH} 2>&1 | tee ${DECOMPOSE_WORKSPACE}/log_init_db_${timestamp}.txt + fi + + # full_graph + insert_graph_sample ${GRAPH_NET_ROOT} "hf_torch_samples" "full_graph" ${model_list} + + # fusible_graph, typical_graph + for sample_type in fusible_graph typical_graph; do + insert_graph_sample $OUTPUT_DIR/$sample_type "hf_torch_samples" $sample_type $OUTPUT_DIR/${sample_type}/sample_list.txt + done + + # insert buckets + python ${GRAPH_NET_ROOT}/sqlite/graph_net_sample_bucket_generator.py --db_path ${DB_PATH} + # insert groups + python ${GRAPH_NET_ROOT}/sqlite/graph_net_sample_groups_insert.py --db_path ${DB_PATH} +} + +function main() { + do_common_generalzation_and_decompose + + sample_type="fusible_graph" generate_fusible_subgraphs - insert_graph_sample ${DEDUP_DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} "github_torch_samples" "fusible_graph" ${deduplicated_fusible_subgraphs_list} + cp -rf $DTYPE_GENERALIZED_FUSIBLE_SUBGRAPH_DIR $OUTPUT_DIR/$sample_type + cp -rf $dtype_generalized_subgraphs_list $OUTPUT_DIR/$sample_type/sample_list.txt + sample_type="typical_graph" generate_typical_subgraphs - insert_graph_sample ${DEDUP_TYPICAL_SUBGRAPH_DIR} "github_torch_samples" "typical_graph" ${deduplicated_typical_subgraph_list} + cp -rf $DTYPE_GENERALIZED_TYPICAL_SUBGRAPH_DIR $OUTPUT_DIR/$sample_type + cp -rf $dtype_generalized_typical_subgraph_list $OUTPUT_DIR/$sample_type/sample_list.txt + + generate_database } -summary() { +function summary() { num_original_samples=`cat $model_list | grep "^samples/" | wc -l` echo "Number of original graphnet samples: $num_original_samples" @@ -628,38 +657,38 @@ summary() { device_rewrited_successed_precent=$(( num_device_rewrited_samples * 100 / num_original_samples )) echo "- [Common - 1] device rewrite: successed=${num_device_rewrited_samples}, percent=$device_rewrited_successed_precent%" - num_successed_dimension_generalized_samples=`find ${DIM_GENERALIZED_SAMPLE_DIR} -name "model.py" | wc -l` - dimension_generalized_samples_successed_percent=$((num_successed_dimension_generalized_samples * 100 / (num_original_samples * 9))) - echo "- [Common - 2] dimension generalization: successed=${num_successed_dimension_generalized_samples}, percent=${dimension_generalized_samples_successed_percent}%" - for index in {0..8}; do - num_successed_dimension_generalized_samples=`find ${DIM_GENERALIZED_SAMPLE_DIR}/${index} -name "model.py" | wc -l` - dimension_generalized_samples_successed_percent=$(( num_successed_dimension_generalized_samples * 100 / num_original_samples )) - echo " ${index}, successed=${num_successed_dimension_generalized_samples}, percent=${dimension_generalized_samples_successed_percent}%" + num_dim_generalized_samples=`find ${DIM_GENERALIZED_SAMPLE_DIR} -name "model.py" | wc -l` + dim_generalization_successed_percent=$((num_dim_generalized_samples * 100 / (num_original_samples * 9))) + echo "- [Common - 2] dimension generalization: successed=${num_dim_generalized_samples}, percent=${dim_generalization_successed_percent}%" + for index in {0..9}; do + num_dim_generalized_samples_index=`find ${DIM_GENERALIZED_SAMPLE_DIR}/${index} -name "model.py" | wc -l` + dim_generalization_successed_percent=$((num_dim_generalized_samples_index * 100 / num_original_samples)) + echo " ${index}, successed=${num_dim_generalized_samples_index}, percent=${dim_generalization_successed_percent}%" done echo "" - num_successed_op_names=`find ${SAMPLE_OP_NAMES_DIR} -name op_names.txt | wc -l` - op_names_successed_percent=$(( num_successed_op_names * 100 / num_original_samples )) - echo "- [Common - 3] generate op names: successed=${num_successed_op_names}, percent=${op_names_successed_percent}%" + num_op_names=`find ${SAMPLE_OP_NAMES_DIR} -name op_names.txt | wc -l` + op_names_successed_percent=$((num_op_names * 100 / num_original_samples)) + echo "- [Common - 3] generate op names: successed=${num_op_names}, percent=${op_names_successed_percent}%" num_typical_subgraph_ranges=`find ${TYPICAL_SUBGRAPH_RANGE_DIR} -name typical_subgraph_ranges.json | wc -l` - typical_subgraph_ranges_successed_percent=$(( num_typical_subgraph_ranges * 100 / num_original_samples )) + typical_subgraph_ranges_successed_percent=$((num_typical_subgraph_ranges * 100 / num_original_samples)) echo "- [Common - 4] generate typical subgraph ranges: successed=${num_typical_subgraph_ranges}, percent=${typical_subgraph_ranges_successed_percent}%" - num_successed_range_decomposed_subgraphs=`find ${TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` - echo "- [Common - 5] range decompose: successed=${num_successed_range_decomposed_subgraphs}" - - num_renamed_subgraphs=`find ${RENAMED_TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` - echo "- [Common - 6] rename: successed=${num_renamed_subgraphs}" + num_typical_subgraphs=`find ${TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Common - 5] range decompose: successed=${num_typical_subgraphs}" + + num_renamed_typical_subgraphs=`find ${RENAMED_TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Common - 6] rename: successed=${num_renamed_typical_subgraphs}" - num_deduplicated_typical_subgraphs=`find ${DEDUP_TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` - echo "- [Common - 7] remove duplicated: successed=${num_deduplicated_typical_subgraphs}" + num_dedup_typical_subgraphs=`find ${DEDUP_TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Common - 7] remove duplicated: successed=${num_dedup_typical_subgraphs}" echo "" # fusible subgraphs - num_successed_cumsum_kernels_subgraphs=`find ${CUMSUM_NUM_KERNELS_DIR} -name "cumsum_num_kernels.json" | wc -l` - cumsum_kernels_successed_percent=$((num_successed_cumsum_kernels_subgraphs * 100 / num_deduplicated_typical_subgraphs)) - echo "- [Fusible - 1] cumsum kernels: successed=${num_successed_cumsum_kernels_subgraphs}, percent=${cumsum_kernels_successed_percent}%" + num_cumsum_kernels_subgraphs=`find ${CUMSUM_NUM_KERNELS_DIR} -name "cumsum_num_kernels.json" | wc -l` + cumsum_kernels_successed_percent=$((num_cumsum_kernels_subgraphs * 100 / num_dedup_typical_subgraphs)) + echo "- [Fusible - 1] cumsum kernels: successed=${num_cumsum_kernels_subgraphs}, percent=${cumsum_kernels_successed_percent}%" num_fusible_subgraph_ranges=`find ${FUSIBLE_SUBGRAPH_RANGE_DIR} -name "fusible_subgraph_ranges.json" | wc -l` num_grouped_fusible_subgraph_ranges=`find ${GROUPED_FUSIBLE_SUBGRAPH_RANGE_DIR} -name "grouped_fusible_subgraph_ranges.json" | wc -l` @@ -667,17 +696,17 @@ summary() { echo " grouped fusible subgraph ranges: successed=${num_grouped_fusible_subgraph_ranges}" echo "" - num_successed_dimension_generalized_subgraphs=`find ${DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l` - echo "- [Fusible - 2] subgraph dimension generalization: successed=${num_successed_dimension_generalized_subgraphs}" - for index in {0..8}; do - num_successed_dimension_generalized_subgraphs=`find ${DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` - echo " ${index}, successed=${num_successed_dimension_generalized_subgraphs}" + num_dim_generalized_subgraphs=`find ${DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Fusible - 2] subgraph dimension generalization: successed=${num_dim_generalized_subgraphs}" + for index in {0..9}; do + num_dim_generalized_subgraphs=`find ${DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_dim_generalized_subgraphs}" done echo "" num_renamed_fusible_subgraphs=`find ${RENAMED_DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l` echo "- [Fusible - 3] rename: successed=${num_renamed_fusible_subgraphs}" - for index in {0..8}; do + for index in {0..9}; do num_renamed_fusible_subgraphs_index=`find ${RENAMED_DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` echo " ${index}, successed=${num_renamed_fusible_subgraphs_index}" done @@ -685,7 +714,7 @@ summary() { num_deduplicated_fusible_subgraphs=`find ${DEDUP_DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR} -name "model.py" | wc -l` echo "- [Fusible - 4] remove duplicated: successed=${num_deduplicated_fusible_subgraphs}" - for index in {0..8}; do + for index in {0..9}; do num_deduplicated_fusible_subgraphs_index=`find ${DEDUP_DIM_GENERALIZED_FUSIBLE_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` echo " ${index}, successed=${num_deduplicated_fusible_subgraphs_index}" done @@ -700,40 +729,39 @@ summary() { done echo "" - num_successed_unittests=`find ${FUSIBLE_SUBGRAPH_UNITTEST_DIR} -name "*_test.py" | wc -l` - unittest_successed_percent=$((num_successed_unittests * 100 / num_dtype_generalized_subgraphs)) - echo "- [Fusible - 6] generate unittest: successed=${num_successed_unittests}, percent=${unittest_successed_percent}%" - for dtype in float32 float16 bfloat16 - do - num_successed_unittests=`find ${FUSIBLE_SUBGRAPH_UNITTEST_DIR}/${dtype} -name "*_test.py" | wc -l` - echo " ${dtype}, successed=${num_successed_unittests}" - done - echo "" - # typical subgraphs - num_successed_dim_generalized_typical_subgraphs=`find ${DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` - echo "- [Typical - 1] subgraph dimension generalization: successed=${num_successed_dim_generalized_typical_subgraphs}" + num_dim_generalized_typical_subgraphs=`find ${DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Typical - 1] subgraph dimension generalization: successed=${num_dim_generalized_typical_subgraphs}" for index in {0..8}; do - num_successed_dim_generalized_typical_index=`find ${DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` - echo " ${index}, successed=${num_successed_dim_generalized_typical_index}" + num_dim_generalized_typical_index=`find ${DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_dim_generalized_typical_index}" done echo "" - num_renamed_typical_subgraphs=`find ${RENAMED_DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` - echo "- [Typical - 2] rename: successed=${num_renamed_typical_subgraphs}" + num_renamed_dim_generalized_typical_subgraphs=`find ${RENAMED_DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Typical - 2] rename: successed=${num_renamed_dim_generalized_typical_subgraphs}" for index in {0..8}; do - num_renamed_typical_subgraphs_index=`find ${RENAMED_DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` - echo " ${index}, successed=${num_renamed_typical_subgraphs_index}" + num_renamed_dim_generalized_typical_subgraphs_index=`find ${RENAMED_DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` + echo " ${index}, successed=${num_renamed_dim_generalized_typical_subgraphs_index}" done echo "" - num_deduplicated_typical_subgraphs=`find ${DEDUP_DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` - echo "- [Typical - 3] remove duplicated: successed=${num_deduplicated_typical_subgraphs}" + num_dedup_typical_subgraphs=`find ${DEDUP_DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Typical - 3] remove duplicated: successed=${num_dedup_typical_subgraphs}" for index in {0..9}; do num_deduplicated_typical_subgraphs_index=`find ${DEDUP_DIM_GENERALIZED_TYPICAL_SUBGRAPH_DIR}/${index} -name "model.py" | wc -l` echo " ${index}, successed=${num_deduplicated_typical_subgraphs_index}" done echo "" + + num_dtype_generalized_typical_subgraphs=`find ${DTYPE_GENERALIZED_TYPICAL_SUBGRAPH_DIR} -name "model.py" | wc -l` + echo "- [Typical - 4] dtype generalization: successed=${num_dtype_generalized_typical_subgraphs}" + for dtype in float32 float16 bfloat16 + do + num_dtype_generalized_typical_subgraphs_index=`find ${DTYPE_GENERALIZED_TYPICAL_SUBGRAPH_DIR}/${dtype} -name "model.py" | wc -l` + echo " ${dtype}, successed=${num_dtype_generalized_typical_subgraphs_index}" + done + echo "" } main diff --git a/graph_net/torch/sample_pass/dtype_generalizer.py b/graph_net/torch/sample_pass/dtype_generalizer.py index 7da61ad70..71f3ce3fb 100755 --- a/graph_net/torch/sample_pass/dtype_generalizer.py +++ b/graph_net/torch/sample_pass/dtype_generalizer.py @@ -330,6 +330,7 @@ def resume(self, rel_model_path: str) -> List[str]: return [] # Parse the computation graph + torch.cuda.empty_cache() module, inputs = get_torch_module_and_inputs( model_path, device=self._choose_device(self.config["device"]) ) @@ -428,7 +429,9 @@ def _apply_pass_and_generate( # run ShapeProp to get real runtime dtypes, then prune redundant .to() nodes. try: torch.cuda.empty_cache() - _, meta_inputs = get_torch_module_and_inputs(str(output_dir)) + _, meta_inputs = get_torch_module_and_inputs( + str(output_dir), device=self._choose_device(self.config["device"]) + ) ShapeProp(gm_modified).propagate(*meta_inputs) gm_modified = dtype_pass.remove_redundant_to_calls(gm_modified) except Exception as e: