| | |
| | |
| |
|
| | from evaluator.CodeBLEU.parser import DFG_python, DFG_java, DFG_ruby, DFG_go, DFG_php, DFG_javascript, DFG_csharp |
| | from evaluator.CodeBLEU.parser import (remove_comments_and_docstrings, |
| | tree_to_token_index, |
| | index_to_code_token, |
| | tree_to_variable_index) |
| | from tree_sitter import Language, Parser |
| | import os |
| |
|
| | root_dir = os.path.dirname(__file__) |
| | dfg_function = { |
| | 'python': DFG_python, |
| | 'java': DFG_java, |
| | 'ruby': DFG_ruby, |
| | 'go': DFG_go, |
| | 'php': DFG_php, |
| | 'javascript': DFG_javascript, |
| | 'c_sharp': DFG_csharp, |
| | } |
| |
|
| |
|
| | def calc_syntax_match(references, candidate, lang): |
| | return corpus_syntax_match([references], [candidate], lang) |
| |
|
| |
|
| | def corpus_syntax_match(references, candidates, lang): |
| | JAVA_LANGUAGE = Language(root_dir + '/parser/my-languages.so', lang) |
| | parser = Parser() |
| | parser.set_language(JAVA_LANGUAGE) |
| | match_count = 0 |
| | total_count = 0 |
| |
|
| | for i in range(len(candidates)): |
| | references_sample = references[i] |
| | candidate = candidates[i] |
| | for reference in references_sample: |
| | try: |
| | candidate = remove_comments_and_docstrings(candidate, 'java') |
| | except: |
| | pass |
| | try: |
| | reference = remove_comments_and_docstrings(reference, 'java') |
| | except: |
| | pass |
| |
|
| | candidate_tree = parser.parse(bytes(candidate, 'utf8')).root_node |
| |
|
| | reference_tree = parser.parse(bytes(reference, 'utf8')).root_node |
| |
|
| | def get_all_sub_trees(root_node): |
| | node_stack = [] |
| | sub_tree_sexp_list = [] |
| | depth = 1 |
| | node_stack.append([root_node, depth]) |
| | while len(node_stack) != 0: |
| | cur_node, cur_depth = node_stack.pop() |
| | sub_tree_sexp_list.append([cur_node.sexp(), cur_depth]) |
| | for child_node in cur_node.children: |
| | if len(child_node.children) != 0: |
| | depth = cur_depth + 1 |
| | node_stack.append([child_node, depth]) |
| | return sub_tree_sexp_list |
| |
|
| | cand_sexps = [x[0] for x in get_all_sub_trees(candidate_tree)] |
| | ref_sexps = get_all_sub_trees(reference_tree) |
| |
|
| | |
| | |
| |
|
| | for sub_tree, depth in ref_sexps: |
| | if sub_tree in cand_sexps: |
| | match_count += 1 |
| | total_count += len(ref_sexps) |
| |
|
| | score = match_count / total_count |
| | return score |
| |
|