Evaluation methods for Word Alignment

import numpy as np import seaborn as sns from matplotlib import pyplot as plt import matplotlib.colors as mcolors

def show_alignment(source, target, sure, fuzzy, prediction=None, figsize=(10,10), sizeMaker=250): source = source.split() target = target.split() sure = sure.split() fuzzy = fuzzy.split() source_len = len(source) target_len = len(target) alignment = np.zeros((source_len, target_len)) labels = np.asarray([["" for _ in range(0, target_len)] for _ in range(0, source_len)]) for idx_source in range(1, source_len+1): for idx_target in range(1, target_len+1): align_check = str(idx_source) +'-'+ str(idx_target) if align_check in sure: alignment[idx_source-1, idx_target-1]=2 labels[idx_source-1, idx_target-1] = "S" if align_check in fuzzy: alignment[idx_source-1, idx_target-1]=1 labels[idx_source-1, idx_target-1] = "F" fig, ax = plt.subplots(figsize=figsize) ax = sns.heatmap(alignment, linewidth=0.3, annot=labels, fmt="", cbar=False, square=True,cmap="Blues") ax.set_yticklabels(source, rotation=0) ax.set_xticklabels(target, rotation=90) ax.set_title("Alignment matrix: Sure (S) and Fuzzy (F) links") if prediction != None: prediction = prediction.split() for idx_source in range(1, source_len+1): for idx_target in range(1, target_len+1): align_check = str(idx_source) +'-'+ str(idx_target) if align_check in prediction: ax.scatter(idx_target-0.5, idx_source-0.5, s=sizeMaker, facecolors='none', edgecolors='red', linewidths=2.) ax.set_title("Alignment matrix: Sure (S) and Fuzzy (F) links. Red circle for Prediction links") fig.tight_layout() plt.show()

def analyse_reference(sure, fuzzy, source, target): sure = sure.split() fuzzy = fuzzy.split() reference = sure + fuzzy source = source.split() target = target.split() #------------------------------------- total_num_link = 0 total_null_link = 0 num_word_source = 0 num_word_target = 0 # Count for Reference num_align_ref = 0 num_no_ref = 0 num_sure = 0 num_fuzzy = 0 num_align_ref_one2one = 0 num_align_ref_one2many_source = 0 num_align_ref_one2many_target = 0 num_align_ref_many2one_source = 0 num_align_ref_many2one_target = 0 num_align_ref_many2many = 0 num_align_ref_many2many_source = 0 num_align_ref_many2many_target = 0 num_no_ref_no = 0 num_no_ref_null = 0 # Null num_source2null_ref = 0 num_target2null_ref = 0 num_source2notnull_ref = 0 num_target2notnull_ref = 0 num_source2null_ref_ratio_list = [] num_target2null_ref_ratio_list = [] #------------------------------------- for s in sure: num_sure+= 1 for p in fuzzy: num_fuzzy+= 1 source_len = len(source) target_len = len(target) total_num_link += ((source_len) * (target_len)) total_null_link += ((source_len) + (target_len)) num_word_source += source_len num_word_target += target_len #------------------------------------- # Null num_source2null_ref_sent = 0 num_target2null_ref_sent = 0 for idx_source in range(1, source_len+1): source_to_null_ref = True for idx_target in range(1, target_len+1): align_check = str(idx_source) +'-'+ str(idx_target) if align_check in reference: source_to_null_ref = False if source_to_null_ref: num_source2null_ref+=1 num_source2null_ref_sent+=1 else: num_source2notnull_ref+=1 for idx_target in range(1, target_len+1): target_to_null_ref = True for idx_source in range(1, source_len+1): align_check = str(idx_source) +'-'+ str(idx_target) if align_check in reference: target_to_null_ref = False if target_to_null_ref: num_target2null_ref+=1 num_target2null_ref_sent+=1 else: num_target2notnull_ref+=1 num_source2null_ref_ratio_list.append(num_source2null_ref_sent/source_len) num_target2null_ref_ratio_list.append(num_target2null_ref_sent/target_len) #------------------------------------- # One-to-one, one-to-many, many-to-one, many-to-many num_align_ref_one2many_source_list = [] num_align_ref_many2one_target_list = [] num_align_ref_many2many_source_list = [] num_align_ref_many2many_target_list = [] align_ref_one2one_list = [] align_ref_one2many_list = [] align_ref_many2one_list = [] align_ref_many2many_list = [] for idx_source in range(1, source_len+1): for idx_target in range(1, target_len+1): align_check = str(idx_source) +'-'+ str(idx_target) # Count number of links in Reference if align_check in reference: num_align_ref +=1 check_one2many = False check_many2one = False for idx_target_ in range(1, target_len+1): align_check_ = str(idx_source) +'-'+ str(idx_target_) if align_check_ in reference and align_check_ != align_check: check_one2many = True for idx_source_ in range(1, source_len+1): align_check_ = str(idx_source_) +'-'+ str(idx_target) if align_check_ in reference and align_check_ != align_check: check_many2one = True if check_one2many is True and check_many2one is False: num_align_ref_one2many_target +=1 align_ref_one2many_list.append(align_check) if idx_source not in num_align_ref_one2many_source_list: num_align_ref_one2many_source_list.append(idx_source) if check_many2one is True and check_one2many is False: num_align_ref_many2one_source +=1 align_ref_many2one_list.append(align_check) if idx_target not in num_align_ref_many2one_target_list: num_align_ref_many2one_target_list.append(idx_target) if check_one2many is False and check_many2one is False: num_align_ref_one2one +=1 align_ref_one2one_list.append(align_check) # Count number of links not in Reference if align_check not in reference: num_no_ref +=1 source_to_null = True for idx_target_ in range(1, target_len+1): align_check_ = str(idx_source) +'-'+ str(idx_target_) if align_check_ in reference: source_to_null = False target_to_null = True for idx_source_ in range(1, source_len+1): align_check_ = str(idx_source_) +'-'+ str(idx_target) if align_check_ in reference: target_to_null = False if source_to_null and target_to_null: num_no_ref_null +=1 else: num_no_ref_no +=1 for idx_source in range(1, source_len+1): for idx_target in range(1, target_len+1): align_check = str(idx_source) +'-'+ str(idx_target) # Count number of many2many links in Ref if align_check in reference: check_one2many = False check_many2one = False for idx_target_ in range(1, target_len+1): align_check_ = str(idx_source) +'-'+ str(idx_target_) if align_check_ in reference and align_check_ != align_check: check_one2many = True for idx_source_ in range(1, source_len+1): align_check_ = str(idx_source_) +'-'+ str(idx_target) if align_check_ in reference and align_check_ != align_check: check_many2one = True if check_one2many is True and check_many2one is True: if idx_source not in num_align_ref_many2many_source_list \ and idx_source not in num_align_ref_one2many_source_list: num_align_ref_many2many_source_list.append(idx_source) if idx_target not in num_align_ref_many2many_target_list \ and idx_target not in num_align_ref_many2one_target_list: num_align_ref_many2many_target_list.append(idx_target) num_align_ref_many2many+=1 align_ref_many2many_list.append(align_check) num_align_ref_one2many_source += len(num_align_ref_one2many_source_list) num_align_ref_many2one_target += len(num_align_ref_many2one_target_list) num_align_ref_many2many_source += len(num_align_ref_many2many_source_list) num_align_ref_many2many_target += len(num_align_ref_many2many_target_list) num_source2null_ref_ratio_mean = np.mean(num_source2null_ref_ratio_list) num_target2null_ref_ratio_mean = np.mean(num_target2null_ref_ratio_list) #------------------------------------- values = {"num_word_source": num_word_source, "num_word_target": num_word_target, "total_num_link": total_num_link, "num_sure": num_sure, "num_fuzzy": num_fuzzy, "num_align_ref": num_align_ref, "num_no_ref": num_no_ref, "num_no_ref_no": num_no_ref_no, "num_no_ref_null": num_no_ref_null, "num_align_ref_one2one": num_align_ref_one2one, "num_align_ref_one2many_source": num_align_ref_one2many_source, "num_align_ref_one2many_target": num_align_ref_one2many_target, "num_align_ref_many2one_source": num_align_ref_many2one_source, "num_align_ref_many2one_target": num_align_ref_many2one_target, "num_align_ref_many2many": num_align_ref_many2many, "num_align_ref_many2many_source": num_align_ref_many2many_source, "num_align_ref_many2many_target": num_align_ref_many2many_target, "total_null_link": total_null_link, "num_source2null_ref": num_source2null_ref, "num_target2null_ref": num_target2null_ref, "num_source2notnull_ref": num_source2notnull_ref, "num_target2notnull_ref": num_target2notnull_ref, "num_source2null_ref_ratio_mean": num_source2null_ref_ratio_mean, "num_target2null_ref_ratio_mean": num_target2null_ref_ratio_mean} return values

def show_graphs_reference(result, figScale=1.2, figsize=(20,5)): fig, axes = plt.subplots(1, 3, figsize=figsize) ax1 = axes[0] ax2 = axes[1] ax3 = axes[2] palettes = list(mcolors.TABLEAU_COLORS.values()) #----------------------------------------------------------------- x = ["Source I="+ str(result["num_word_source"]), "Target J="+ str(result["num_word_target"])] y = [result["num_word_source"], result["num_word_target"] ] y_aligned = [result["num_source2notnull_ref"], result["num_target2notnull_ref"] ] y_notAligned = [result["num_source2null_ref"], result["num_target2null_ref"] ] ax1.bar(x=x, height=np.array(y_aligned)+np.array(y_notAligned), color=palettes[0]) ax1.bar(x=x, height=y_aligned, color=palettes[1]) ax1.set(xlabel="Aligned/Unaligned words", ylabel='Number of words') ax1.set(ylim=(0, np.max(y)*figScale)) for index, (v1, v2) in enumerate(zip(y_aligned,y_notAligned)): ax1.text(index, v1, str(v1), color='black', ha="center", verticalalignment="bottom") ax1.text(index, v1+v2, str(v2), color='black', ha="center", verticalalignment="bottom") ax1.legend(("Unaligned", "Aligned"),loc=4) #----------------------------------------------------------------- x = ["Sure links", "Fuzzy links", "Null links"] y = [result["num_sure"], result["num_fuzzy"], result["num_no_ref_null"]] sns.barplot(x=x, y=y, ax=ax2, palette=palettes) ax2.set(ylim=(0, np.max(y)*figScale)) ax2.set(xlabel="All possible links I*J: " + str(result["total_num_link"]) + \ "\n Non-existing links: " + str(result["num_no_ref"]) + ", including Null links", ylabel='Number of links') for index, value in enumerate(y): ax2.text(index, value, str(value), color='black', ha="center", verticalalignment="bottom") #----------------------------------------------------------------- x = ["One2One", "One2Many", "Many2One", "Many2Many", "%"] y = [result["num_align_ref_one2one"], result["num_align_ref_one2many_target"], result["num_align_ref_many2one_source"], result["num_align_ref_many2many"]] y_text = [str(result["num_align_ref_one2one"])+"-"+str(result["num_align_ref_one2one"]), str(result["num_align_ref_one2many_source"])+"-"+str(result["num_align_ref_one2many_target"]), str(result["num_align_ref_many2one_source"])+"-"+str(result["num_align_ref_many2one_target"]), str(result["num_align_ref_many2many_source"])+"-"+str(result["num_align_ref_many2many_target"]) ] y_percent = np.array(y)*100/result["num_align_ref"] one2one_percent = (100 * result["num_align_ref_one2one"]/result["num_align_ref"]) one2many_percent = one2one_percent + (100 * result["num_align_ref_one2many_target"]/result["num_align_ref"]) many2one_percent = one2many_percent + (100 * result["num_align_ref_many2one_source"]/result["num_align_ref"]) many2many_percent = many2one_percent + (100 * result["num_align_ref_many2many"]/result["num_align_ref"]) ax3.set(ylim=(0, np.max(y)*figScale)) ax3.set(xlabel="Alignment links: Source-Target", ylabel='Number of links') ax3.bar(x=x, height=[result["num_align_ref_one2one"], 0, 0, 0, 0], color=palettes[0]) ax3.bar(x=x, height=[0, result["num_align_ref_one2many_target"], 0, 0, 0], color=palettes[1]) ax3.bar(x=x, height=[0, 0, result["num_align_ref_many2one_source"], 0, 0], color=palettes[2]) ax3.bar(x=x, height=[0, 0, 0, result["num_align_ref_many2many"], 0], color=palettes[3]) for index, (height, value) in enumerate(zip(y,y_text)): ax3.text(index, height, value, color='black', ha="center", verticalalignment="bottom") ax3X = ax3.twinx() ax3X.set(ylabel='Percentage') ax3X.set(ylim=(0, 110)) ax3X.bar(x=x, height=[0, 0, 0, 0, many2many_percent], color=palettes[3]) ax3X.bar(x=x, height=[0, 0, 0, 0, many2one_percent], color=palettes[2]) ax3X.bar(x=x, height=[0, 0, 0, 0, one2many_percent], color=palettes[1]) ax3X.bar(x=x, height=[0, 0, 0, 0, one2one_percent], color=palettes[0]) y_bar = [one2one_percent, one2many_percent, many2one_percent, many2many_percent] for index, value in zip(y_bar, y_percent): if value != 0: ax3X.text(4, index, str(np.round(value, 1)) +"%", color='black', ha="center", verticalalignment="bottom") ax3X.legend(("Many2Many", "Many2One", "One2Many", "One2One"),loc=4) #----------------------------------------------------------------- fig.tight_layout(pad=3.)

def calculate_scores(tp, fp, tn, fn): acc = 0. if tp + fp + tn + fn != 0: acc = (tp+tn)/(tp + fp + tn + fn) precision = 0. if tp+fp != 0.: precision = tp/(tp+fp) recall = 0. if tp+fn != 0.: recall = tp/(tp+fn) f1 = 0. if precision+recall != 0: f1 = (2*precision*recall)/(precision+recall) return acc, precision, recall, f1

def analyse_prediction(prediction, sure, fuzzy, source, target): sure = sure.split() fuzzy = fuzzy.split() ref = sure + fuzzy pred = prediction.split() source = source.split() target = target.split() #------------------------------------- # Count for Prediction num_align_pred = 0 num_no_pred = 0 num_align_pred_one2one = 0 num_align_pred_one2many_source = 0 num_align_pred_one2many_target = 0 num_align_pred_many2one_source = 0 num_align_pred_many2one_target = 0 num_align_pred_many2many = 0 num_align_pred_many2many_source = 0 num_align_pred_many2many_target = 0 num_no_pred_no = 0 num_no_pred_null = 0 #TP num_true_align_tp = 0 num_true_align_tp_one2one_pred = 0 num_true_align_tp_one2many_pred = 0 num_true_align_tp_many2one_pred = 0 num_true_align_tp_many2many_pred = 0 #TN num_true_no_tn = 0 num_true_no_tn_no_in_pred = 0 num_true_no_tn_null_in_pred = 0 #FN num_false_no_fn = 0 num_false_no_fn_no_in_pred = 0 num_false_no_fn_null_in_pred = 0 #FP num_false_align_no_fp = 0 num_false_align_no_fp_one2one_pred = 0 num_false_align_no_fp_one2many_pred = 0 num_false_align_no_fp_many2one_pred = 0 num_false_align_no_fp_many2many_pred = 0 num_false_align_no_fp_no_in_ref = 0 num_false_align_no_fp_null_in_ref = 0 # Null num_source2null_pred = 0 num_target2null_pred = 0 num_source2notnull_pred = 0 num_target2notnull_pred = 0 num_source2null_pred_tp = 0 num_source2null_pred_fp = 0 num_source2null_pred_tn = 0 num_source2null_pred_fn = 0 num_target2null_pred_tp = 0 num_target2null_pred_fp = 0 num_target2null_pred_tn = 0 num_target2null_pred_fn = 0 num_source2null_pred_ratio_list = [] num_target2null_pred_ratio_list = [] num_true_null_tp = 0 num_false_not_null_fp = 0 num_false_null_fn = 0 num_true_not_null_tn = 0 source_len = len(source) target_len = len(target) #------------------------------------- # Null num_source2null_pred_sent = 0 num_target2null_pred_sent = 0 for idx_source in range(1, source_len+1): source_to_null_ref = True source_to_null_pred = True for idx_target in range(1, target_len+1): align_check = str(idx_source) +'-'+ str(idx_target) if align_check in ref: source_to_null_ref = False if align_check in pred: source_to_null_pred = False if source_to_null_pred: num_source2null_pred+=1 num_source2null_pred_sent+=1 else: num_source2notnull_pred+=1 if source_to_null_ref and source_to_null_pred: num_true_null_tp+=1 num_source2null_pred_tp+=1 if not source_to_null_ref and not source_to_null_pred: num_true_not_null_tn+=1 num_source2null_pred_tn+=1 if source_to_null_ref and not source_to_null_pred: num_false_null_fn+=1 num_source2null_pred_fn+=1 if not source_to_null_ref and source_to_null_pred: num_false_not_null_fp+=1 num_source2null_pred_fp+=1 for idx_target in range(1, target_len+1): target_to_null_ref = True target_to_null_pred = True for idx_source in range(1, source_len+1): align_check = str(idx_source) +'-'+ str(idx_target) if align_check in ref: target_to_null_ref = False if align_check in pred: target_to_null_pred = False if target_to_null_pred: num_target2null_pred+=1 num_target2null_pred_sent+=1 else: num_target2notnull_pred+=1 if target_to_null_ref and target_to_null_pred: num_true_null_tp+=1 num_target2null_pred_tp+=1 if not target_to_null_ref and not target_to_null_pred: num_true_not_null_tn+=1 num_target2null_pred_tn+=1 if target_to_null_ref and not target_to_null_pred: num_false_null_fn+=1 num_target2null_pred_fn+=1 if not target_to_null_ref and target_to_null_pred: num_false_not_null_fp+=1 num_target2null_pred_fp+=1 num_source2null_pred_ratio_list.append(num_source2null_pred_sent/source_len) num_target2null_pred_ratio_list.append(num_target2null_pred_sent/target_len) #------------------------------------- # One-to-one, one-to-many, many-to-one, many-to-many num_align_pred_one2many_source_list = [] num_align_pred_many2one_target_list = [] num_align_pred_many2many_source_list = [] num_align_pred_many2many_target_list = [] align_pred_one2one_list = [] align_pred_one2many_list = [] align_pred_many2one_list = [] align_pred_many2many_list = [] for idx_source in range(1, source_len+1): for idx_target in range(1, target_len+1): align_check = str(idx_source) +'-'+ str(idx_target) # Count number of links in Prediction if align_check in pred: num_align_pred +=1 check_one2many = False check_many2one = False for idx_target_ in range(1, target_len+1): align_check_ = str(idx_source) +'-'+ str(idx_target_) if align_check_ in pred and align_check_ != align_check: check_one2many = True for idx_source_ in range(1, source_len+1): align_check_ = str(idx_source_) +'-'+ str(idx_target) if align_check_ in pred and align_check_ != align_check: check_many2one = True if check_one2many is True and check_many2one is False: num_align_pred_one2many_target +=1 align_pred_one2many_list.append(align_check) if idx_source not in num_align_pred_one2many_source_list: num_align_pred_one2many_source_list.append(idx_source) if check_many2one is True and check_one2many is False: num_align_pred_many2one_source +=1 align_pred_many2one_list.append(align_check) if idx_target not in num_align_pred_many2one_target_list: num_align_pred_many2one_target_list.append(idx_target) if check_one2many is False and check_many2one is False: num_align_pred_one2one +=1 align_pred_one2one_list.append(align_check) # Count number of links not in Prediction if align_check not in pred: num_no_pred+=1 source_to_null = True for idx_target_ in range(1, target_len+1): align_check_ = str(idx_source) +'-'+ str(idx_target_) if align_check_ in pred: source_to_null = False target_to_null = True for idx_source_ in range(1, source_len+1): align_check_ = str(idx_source_) +'-'+ str(idx_target) if align_check_ in pred: target_to_null = False if source_to_null and target_to_null: num_no_pred_null +=1 else: num_no_pred_no +=1 for idx_source in range(1, source_len+1): for idx_target in range(1, target_len+1): align_check = str(idx_source) +'-'+ str(idx_target) # Count number of many2many links in Prediction if align_check in pred: check_one2many = False check_many2one = False for idx_target_ in range(1, target_len+1): align_check_ = str(idx_source) +'-'+ str(idx_target_) if align_check_ in pred and align_check_ != align_check: check_one2many = True for idx_source_ in range(1, source_len+1): align_check_ = str(idx_source_) +'-'+ str(idx_target) if align_check_ in pred and align_check_ != align_check: check_many2one = True if check_one2many is True and check_many2one is True: if idx_source not in num_align_pred_many2many_source_list \ and idx_source not in num_align_pred_one2many_source_list: num_align_pred_many2many_source_list.append(idx_source) if idx_target not in num_align_pred_many2many_target_list \ and idx_target not in num_align_pred_many2one_target_list: num_align_pred_many2many_target_list.append(idx_target) num_align_pred_many2many+=1 align_pred_many2many_list.append(align_check) #------------------------------------- for idx_source in range(1, source_len+1): for idx_target in range(1, target_len+1): align_check = str(idx_source) +'-'+ str(idx_target) # Count number of links in Prediction and in Ref: TP if align_check in pred and align_check in ref: num_true_align_tp +=1 if align_check in align_pred_one2one_list: num_true_align_tp_one2one_pred+=1 if align_check in align_pred_one2many_list: num_true_align_tp_one2many_pred+=1 if align_check in align_pred_many2one_list: num_true_align_tp_many2one_pred+=1 if align_check in align_pred_many2many_list: num_true_align_tp_many2many_pred+=1 # Count number of links not in Prediction and not in Ref: TN if align_check not in pred and align_check not in ref: num_true_no_tn+=1 source_to_null = True for idx_target_ in range(1, target_len+1): align_check_ = str(idx_source) +'-'+ str(idx_target_) if align_check_ in pred: source_to_null = False target_to_null = True for idx_source_ in range(1, source_len+1): align_check_ = str(idx_source_) +'-'+ str(idx_target) if align_check_ in pred: target_to_null = False if source_to_null and target_to_null: num_true_no_tn_null_in_pred +=1 else: num_true_no_tn_no_in_pred +=1 # Count number of links in Prediction and not in Ref: FP if align_check in pred and align_check not in ref: num_false_align_no_fp+=1 if align_check in align_pred_one2one_list: num_false_align_no_fp_one2one_pred+=1 if align_check in align_pred_one2many_list: num_false_align_no_fp_one2many_pred+=1 if align_check in align_pred_many2one_list: num_false_align_no_fp_many2one_pred+=1 if align_check in align_pred_many2many_list: num_false_align_no_fp_many2many_pred+=1 source_to_null = True for idx_target_ in range(1, target_len+1): align_check_ = str(idx_source) +'-'+ str(idx_target_) if align_check_ in ref: source_to_null = False target_to_null = True for idx_source_ in range(1, source_len+1): align_check_ = str(idx_source_) +'-'+ str(idx_target) if align_check_ in ref: target_to_null = False if source_to_null and target_to_null: num_false_align_no_fp_null_in_ref +=1 else: num_false_align_no_fp_no_in_ref +=1 # Count number of links not in Prediction and in Ref: FN if align_check not in pred and align_check in ref: num_false_no_fn+=1 source_to_null = True for idx_target_ in range(1, target_len+1): align_check_ = str(idx_source) +'-'+ str(idx_target_) if align_check_ in pred: source_to_null = False target_to_null = True for idx_source_ in range(1, source_len+1): align_check_ = str(idx_source_) +'-'+ str(idx_target) if align_check_ in pred: target_to_null = False if source_to_null and target_to_null: num_false_no_fn_null_in_pred +=1 else: num_false_no_fn_no_in_pred +=1 num_align_pred_one2many_source += len(num_align_pred_one2many_source_list) num_align_pred_many2one_target += len(num_align_pred_many2one_target_list) num_align_pred_many2many_source += len(num_align_pred_many2many_source_list) num_align_pred_many2many_target += len(num_align_pred_many2many_target_list) num_source2null_pred_ratio_mean = np.mean(num_source2null_pred_ratio_list) num_target2null_pred_ratio_mean = np.mean(num_target2null_pred_ratio_list) #------------------------------------- sure_correct = 0. fuzzy_correct = 0. count_sure = len(sure) for p in pred: if p in sure: sure_correct+=1. if p in fuzzy: fuzzy_correct+=1. aer = 1. - (sure_correct*2 + fuzzy_correct)/ (num_align_pred + count_sure) #------------------------------------- acc, precision, recall, f1 = calculate_scores(num_true_align_tp, num_false_align_no_fp, num_true_no_tn, num_false_no_fn) null_acc, null_precision, null_recall, null_f1 = calculate_scores(num_true_null_tp, num_false_not_null_fp, num_true_not_null_tn, num_false_null_fn) values = {"num_align_pred": num_align_pred, "num_no_pred": num_no_pred, "num_no_pred_no": num_no_pred_no, "num_no_pred_null": num_no_pred_null, "num_true_align_tp": num_true_align_tp, "num_false_align_no_fp": num_false_align_no_fp, "num_false_align_no_fp_no_in_ref": num_false_align_no_fp_no_in_ref, "num_false_align_no_fp_null_in_ref": num_false_align_no_fp_null_in_ref, "num_false_no_fn": num_false_no_fn, "num_false_no_fn_no_in_pred": num_false_no_fn_no_in_pred, "num_false_no_fn_null_in_pred": num_false_no_fn_null_in_pred, "num_true_no_tn": num_true_no_tn, "num_true_no_tn_no_in_pred": num_true_no_tn_no_in_pred, "num_true_no_tn_null_in_pred": num_true_no_tn_null_in_pred, "aer": aer, "acc": acc, "precision": precision, "recall": recall, "f1": f1, "num_align_pred_one2one": num_align_pred_one2one, "num_align_pred_one2many_source": num_align_pred_one2many_source, "num_align_pred_one2many_target": num_align_pred_one2many_target, "num_align_pred_many2one_source": num_align_pred_many2one_source, "num_align_pred_many2one_target": num_align_pred_many2one_target, "num_align_pred_many2many": num_align_pred_many2many, "num_align_pred_many2many_source": num_align_pred_many2many_source, "num_align_pred_many2many_target": num_align_pred_many2many_target, "num_true_align_tp_one2one_pred": num_true_align_tp_one2one_pred, "num_true_align_tp_one2many_pred": num_true_align_tp_one2many_pred, "num_true_align_tp_many2one_pred": num_true_align_tp_many2one_pred, "num_true_align_tp_many2many_pred": num_true_align_tp_many2many_pred, "num_false_align_no_fp_one2one_pred": num_false_align_no_fp_one2one_pred, "num_false_align_no_fp_one2many_pred": num_false_align_no_fp_one2many_pred, "num_false_align_no_fp_many2one_pred": num_false_align_no_fp_many2one_pred, "num_false_align_no_fp_many2many_pred": num_false_align_no_fp_many2many_pred, "num_source2null_pred": num_source2null_pred, "num_target2null_pred": num_target2null_pred, "num_source2notnull_pred": num_source2notnull_pred, "num_target2notnull_pred": num_target2notnull_pred, "num_source2null_pred_tp": num_source2null_pred_tp, "num_source2null_pred_fp": num_source2null_pred_fp, "num_source2null_pred_fn": num_source2null_pred_fn, "num_source2null_pred_tn": num_source2null_pred_tn, "num_target2null_pred_tp": num_target2null_pred_tp, "num_target2null_pred_fp": num_target2null_pred_fp, "num_target2null_pred_fn": num_target2null_pred_fn, "num_target2null_pred_tn": num_target2null_pred_tn, "num_source2null_pred_ratio_mean": num_source2null_pred_ratio_mean, "num_target2null_pred_ratio_mean": num_target2null_pred_ratio_mean, "num_true_null_tp": num_true_null_tp, "num_false_not_null_fp": num_false_not_null_fp, "num_false_null_fn": num_false_null_fn, "num_true_not_null_tn": num_true_not_null_tn, "null_acc": null_acc, "null_precision": null_precision, "null_recall": null_recall, "null_f1": null_f1} return values

def show_graphs_prediction_words(result, figScale=1.2, figsize=(20,5)): fig, axes = plt.subplots(1, 3, figsize=figsize) ax1 = axes[0] ax2 = axes[1] ax3 = axes[2] palettes = list(mcolors.TABLEAU_COLORS.values()) #----------------------------------------------------------------- x = ["Source I="+ str(result["num_source2null_pred"] + result["num_source2notnull_pred"]), "Target J="+ str(result["num_target2null_pred"] + result["num_target2notnull_pred"])] y = [result["num_source2null_pred"] + result["num_source2notnull_pred"], result["num_target2null_pred"] + result["num_target2notnull_pred"] ] y_aligned = [result["num_source2notnull_pred"], result["num_target2notnull_pred"] ] y_notAligned = [result["num_source2null_pred"], result["num_target2null_pred"] ] ax1.bar(x=x, height=np.array(y_aligned)+np.array(y_notAligned), color=palettes[0]) ax1.bar(x=x, height=y_aligned, color=palettes[1]) ax1.set(xlabel="Aligned/Unaligned words", ylabel='Number of words') ax1.set(ylim=(0, np.max(y)*figScale)) for index, (v1, v2) in enumerate(zip(y_aligned,y_notAligned)): if v1 != 0: ax1.text(index, v1, str(v1), color='black', ha="center", verticalalignment="bottom") if v2 != 0: ax1.text(index, v1+v2, str(v2), color='black', ha="center", verticalalignment="bottom") ax1.legend(("Unaligned", "Aligned"),loc=4) #----------------------------------------------------------------- x = ["TP", "FP", "FN", "TN"] y_source = [result["num_source2null_pred_tp"], result["num_source2null_pred_fp"], result["num_source2null_pred_fn"], result["num_source2null_pred_tn"]] y_target = [result["num_target2null_pred_tp"], result["num_target2null_pred_fp"], result["num_target2null_pred_fn"], result["num_target2null_pred_tn"]] ax2.bar(x=x, height=np.array(y_source)+np.array(y_target), color=palettes[2]) ax2.bar(x=x, height=y_source, color=palettes[3]) ax2.set(xlabel="Counts for Unaligned words", ylabel='Number of words') ax2.set(ylim=(0, np.max(np.array(y_source)+np.array(y_target))*figScale)) ax2.legend(("Target", "Source"),loc=4) for index, (v1, v2) in enumerate(zip(y_source,y_target)): if v1 != 0: ax2.text(index, v1, str(v1), color='black', ha="center", verticalalignment="bottom") if v2 != 0: ax2.text(index, v1+v2, str(v2), color='black', ha="center", verticalalignment="bottom") #----------------------------------------------------------------- x = ["ACC", "PRECISION", "RECALL", "F1"] y = [result["null_acc"], result["null_precision"], result["null_recall"], result["null_f1"]] ax3.set(ylim=(0, 110)) sns.barplot(x=x, y=np.array(y)*100, ax=ax3, palette=palettes) ax3.set(xlabel="Scores for Unaligned words (%)") for index, value in enumerate(y): ax3.text(index, value*100 , str(np.round(value*100,1))+"%", color='black', ha="center", verticalalignment="bottom") def show_graphs_prediction_links(result, figScale=1.2, figsize=(20,5)): fig, axes = plt.subplots(1, 3, figsize=figsize) ax4 = axes[0] ax5 = axes[1] ax6 = axes[2] palettes = list(mcolors.TABLEAU_COLORS.values()) #----------------------------------------------------------------- x = ["Alignment links", "Null links"] y = [result["num_align_pred"], result["num_no_pred_null"]] sns.barplot(x=x, y=y, ax=ax4, palette=palettes) ax4.set(xlabel="All possible links I*J: " + str(result["num_align_pred"] + result["num_no_pred"]) + \ "\n Non-existing links: " + str(result["num_no_pred"]) + ", including Null links", ylabel='Number of links') for index, value in enumerate(y): ax4.text(index, 0, str(value), color='black', ha="center", verticalalignment="bottom") #----------------------------------------------------------------- x = ["TP", "FP", "FN", "TN"] y = [result["num_true_align_tp"], result["num_false_align_no_fp"], result["num_false_no_fn"], result["num_true_no_tn"]] ax5.set(ylim=(0, np.max(y)/50)) sns.barplot(x=x, y=y, ax=ax5, palette=palettes) ax5.set(xlabel="Counts for Links") for index, value in enumerate(y): ax5.text(index, 1, str(value), color='black', ha="center", verticalalignment="bottom") #----------------------------------------------------------------- x = ["AER", "ACC", "PRECISION", "RECALL", "F1"] y = [result["aer"], result["acc"], result["precision"], result["recall"], result["f1"]] ax6.set(ylim=(0, 110)) sns.barplot(x=x, y=np.array(y)*100, ax=ax6, palette=palettes) ax6.set(xlabel="Scores for Links (%)") for index, value in enumerate(y): ax6.text(index, value*100 , str(np.round(value*100,1))+"%", color='black', ha="center", verticalalignment="bottom") def show_graphs_prediction_fertility(result, figScale=1.2, figsize=(20,5)): fig, axes = plt.subplots(1, 3, figsize=figsize) ax7 = axes[0] ax8 = axes[1] axes[2].axis('off') palettes = list(mcolors.TABLEAU_COLORS.values()) #----------------------------------------------------------------- x = ["One2One", "One2Many", "Many2One", "Many2Many", "%"] y = [result["num_align_pred_one2one"], result["num_align_pred_one2many_target"], result["num_align_pred_many2one_source"], result["num_align_pred_many2many"]] y_text = [str(result["num_align_pred_one2one"])+"-"+str(result["num_align_pred_one2one"]), str(result["num_align_pred_one2many_source"])+"-"+str(result["num_align_pred_one2many_target"]), str(result["num_align_pred_many2one_source"])+"-"+str(result["num_align_pred_many2one_target"]), str(result["num_align_pred_many2many_source"])+"-"+str(result["num_align_pred_many2many_target"]) ] y_percent = np.array(y)*100/result["num_align_pred"] one2one_percent = (100 * result["num_align_pred_one2one"]/result["num_align_pred"]) one2many_percent = one2one_percent + (100 * result["num_align_pred_one2many_target"]/result["num_align_pred"]) many2one_percent = one2many_percent + (100 * result["num_align_pred_many2one_source"]/result["num_align_pred"]) many2many_percent = many2one_percent + (100 * result["num_align_pred_many2many"]/result["num_align_pred"]) ax7.set(xlabel="Alignment links: Source-Target", ylabel='Number of links') ax7.set(ylim=(0, np.max(y)*figScale)) ax7.bar(x=x, height=[result["num_align_pred_one2one"], 0, 0, 0, 0], color=palettes[0]) ax7.bar(x=x, height=[0, result["num_align_pred_one2many_target"], 0, 0, 0], color=palettes[1]) ax7.bar(x=x, height=[0, 0, result["num_align_pred_many2one_source"], 0, 0], color=palettes[2]) ax7.bar(x=x, height=[0, 0, 0, result["num_align_pred_many2many"], 0], color=palettes[3]) for index, (height, value) in enumerate(zip(y,y_text)): ax7.text(index, height, value, color='black', ha="center", verticalalignment="bottom") ax7X = ax7.twinx() ax7X.set(ylim=(0, 110)) ax7X.set(ylabel='Percentage') ax7X.bar(x=x, height=[0, 0, 0, 0, many2many_percent], color=palettes[3]) ax7X.bar(x=x, height=[0, 0, 0, 0, many2one_percent], color=palettes[2]) ax7X.bar(x=x, height=[0, 0, 0, 0, one2many_percent], color=palettes[1]) ax7X.bar(x=x, height=[0, 0, 0, 0, one2one_percent], color=palettes[0]) y_bar = [one2one_percent, one2many_percent, many2one_percent, many2many_percent] for index, value in zip(y_bar, y_percent): if value != 0: ax7X.text(4, index, str(np.round(value, 1)) +"%", color='black', ha="center", verticalalignment="bottom") ax7X.legend(("Many2Many", "Many2One", "One2Many", "One2One"),loc=4) #----------------------------------------------------------------- x = ["One2One", "One2Many", "Many2One", "Many2Many"] y = [result["num_align_pred_one2one"], result["num_align_pred_one2many_target"], result["num_align_pred_many2one_source"], result["num_align_pred_many2many"]] y_true = [result["num_true_align_tp_one2one_pred"], result["num_true_align_tp_one2many_pred"], result["num_true_align_tp_many2one_pred"], result["num_true_align_tp_many2many_pred"]] y_false = [result["num_align_pred_one2one"] - result["num_true_align_tp_one2one_pred"], result["num_align_pred_one2many_target"] - result["num_true_align_tp_one2many_pred"], result["num_align_pred_many2one_source"] - result["num_true_align_tp_many2one_pred"], result["num_align_pred_many2many"] - result["num_true_align_tp_many2many_pred"]] ax8.set(xlabel="Counts for Alignment links", ylabel='Number of links') ax8.set(ylim=(0, np.max(y)*figScale)) ax8.bar(x=x, height=np.array(y_true) + np.array(y_false), color=palettes[0]) ax8.bar(x=x, height=y_true, color=palettes[1]) for index, (v1, v2) in enumerate(zip(y_true,y_false)): if v1 != 0: ax8.text(index, v1, str(v1), color='black', ha="center", verticalalignment="bottom") if v2 != 0: ax8.text(index, v1+v2, str(v2), color='black', ha="center", verticalalignment="bottom") ax8.legend(("Incorrect", "Correct"),loc=4) fig.tight_layout(pad=3.)

sure = "1-2 2-3 4-4 6-6 7-7 10-9 12-10 13-14 14-16 15-15 17-19 18-20 19-21 20-22 21-23 22-27 23-28 24-29 25-29 27-30 28-31 28-32 31-34 32-35 33-36" fuzzy = "30-33 4-5 8-8 11-9 16-15 34-37 35-38 36-39 37-40 38-43" pred = "1-2 2-3 4-4 4-5 6-6 7-7 8-8 10-9 11-9 12-10 13-15 14-16 15-12 16-15 17-14 18-21 19-21 20-22 21-23 22-27 23-28 24-29 25-29 25-30 28-30 28-32" target = "i don't believe there are many people who while reading the results of an opinion poll which was conducted by Metro Media Transilvania polling centre , and which refers to the prime minister , are inclined to believe a thing it says ." source = "nu cred ca sint prea multi cei care , atunci cind citesc un sondaj de opinie realizat de Metro Media Transilvania si care se refera si la primul - ministru , sint dispusi sa creada o boaba ."

show_alignment(source, target, sure, fuzzy, prediction=pred)

resultRef = analyse_reference(sure, fuzzy, source, target) resultPred = analyse_prediction(pred, sure, fuzzy, source, target)

show_graphs_reference(resultRef)

show_graphs_prediction_words(resultPred)

show_graphs_prediction_links(resultPred)

show_graphs_prediction_fertility(resultPred)