From e9c2874f753595e4a8581c4f0d480200496588ef Mon Sep 17 00:00:00 2001 From: Ryan Alcantara <38192529+alcantarar@users.noreply.github.com> Date: Fri, 26 Apr 2019 19:00:10 -0600 Subject: [PATCH 1/2] prevent print_topics from truncating features If printing features of varying lengths, especially when ngram > 1, print_topics output was difficult to interpret. Now adjusts topic width to fit longest feature in that topic. --- mglearn/tools.py | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/mglearn/tools.py b/mglearn/tools.py index b67686e..6b4cb4a 100644 --- a/mglearn/tools.py +++ b/mglearn/tools.py @@ -98,19 +98,47 @@ def print_topics(topics, feature_names, sorting, topics_per_chunk=6, these_topics = topics[i: i + topics_per_chunk] # maybe we have less than topics_per_chunk left len_this_chunk = len(these_topics) - # print topic headers - print(("topic {:<8}" * len_this_chunk).format(*these_topics)) - print(("-------- {0:<5}" * len_this_chunk).format("")) + # get max length of feature names + row = [] + feat_len = [] + + #generate list of sorted features and their lengths + for i in range(n_words): + row.append(feature_names[sorting[these_topics, i]]) + topic_words = np.array(row).T + #get max feature length for each topic + max_feat_len = [] + for t in topic_words: + max_feat_len.append(len(max(t, key = len))) + #generate space between strings equal to 1+len(longest string in topic) + result = [None]*len(these_topics)*2 + result[::2] = these_topics + nums = np.array([(x - 5) for x in max_feat_len]) + nums[nums < 0] = 0 #prevents spaces of negative length + result[1::2] = [str(x) for x in nums] + print(("topic {:<{}} " * len_this_chunk).format(*result)) + + #generate space between strings equal to 1+len(longest string in topic) + result = [None]*len(these_topics)*2 + result[::2] = ['']*len(these_topics) + nums = np.array([(x - 8) for x in max_feat_len]) + nums[nums < 0] = 0 #prevents spaces of negative length + result[1::2] = [str(x) for x in nums] + print(("-------- {:<{}} " * len_this_chunk).format(*result)) + # print top n_words frequent words for i in range(n_words): + #generate space between strings + result = [None]*len(these_topics)*2 + result[::2] = feature_names[sorting[these_topics, i]] + result[1::2] = [str(x+2) for x in max_feat_len] try: - print(("{:<14}" * len_this_chunk).format( - *feature_names[sorting[these_topics, i]])) + print(("{:<{}}" * len_this_chunk).format(*result)) except: pass print("\n") - + def get_tree(tree, **kwargs): try: # python3 From 57046630ac4cb9143ded41f562606d4039cfa711 Mon Sep 17 00:00:00 2001 From: Ryan Alcantara <38192529+alcantarar@users.noreply.github.com> Date: Fri, 26 Apr 2019 19:08:09 -0600 Subject: [PATCH 2/2] cleanup unused variables --- mglearn/tools.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mglearn/tools.py b/mglearn/tools.py index 6b4cb4a..3d952be 100644 --- a/mglearn/tools.py +++ b/mglearn/tools.py @@ -98,11 +98,8 @@ def print_topics(topics, feature_names, sorting, topics_per_chunk=6, these_topics = topics[i: i + topics_per_chunk] # maybe we have less than topics_per_chunk left len_this_chunk = len(these_topics) - # get max length of feature names - row = [] - feat_len = [] - #generate list of sorted features and their lengths + row = [] for i in range(n_words): row.append(feature_names[sorting[these_topics, i]]) topic_words = np.array(row).T