From e9c2874f753595e4a8581c4f0d480200496588ef Mon Sep 17 00:00:00 2001
From: Ryan Alcantara <38192529+alcantarar@users.noreply.github.com>
Date: Fri, 26 Apr 2019 19:00:10 -0600
Subject: [PATCH 1/2] prevent print_topics from truncating features

If printing features of varying lengths, especially when ngram > 1, print_topics output was difficult to interpret. Now adjusts topic width to fit longest feature in that topic.
---
 mglearn/tools.py | 40 ++++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/mglearn/tools.py b/mglearn/tools.py
index b67686e..6b4cb4a 100644
--- a/mglearn/tools.py
+++ b/mglearn/tools.py
@@ -98,19 +98,47 @@ def print_topics(topics, feature_names, sorting, topics_per_chunk=6,
         these_topics = topics[i: i + topics_per_chunk]
         # maybe we have less than topics_per_chunk left
         len_this_chunk = len(these_topics)
-        # print topic headers
-        print(("topic {:<8}" * len_this_chunk).format(*these_topics))
-        print(("-------- {0:<5}" * len_this_chunk).format(""))
+        # get max length of feature names
+        row = []
+        feat_len = []
+        
+        #generate list of sorted features and their lengths
+        for i in range(n_words):
+            row.append(feature_names[sorting[these_topics, i]])
+        topic_words = np.array(row).T
+        #get max feature length for each topic
+        max_feat_len = []
+        for t in topic_words:
+            max_feat_len.append(len(max(t, key = len)))
+        #generate space between strings equal to 1+len(longest string in topic)
+        result = [None]*len(these_topics)*2
+        result[::2] = these_topics
+        nums = np.array([(x - 5) for x in max_feat_len])
+        nums[nums < 0] = 0 #prevents spaces of negative length
+        result[1::2] = [str(x) for x in nums]
+        print(("topic {:<{}} " * len_this_chunk).format(*result))
+        
+        #generate space between strings equal to 1+len(longest string in topic)
+        result = [None]*len(these_topics)*2
+        result[::2] = ['']*len(these_topics)
+        nums = np.array([(x - 8) for x in max_feat_len])
+        nums[nums < 0] = 0 #prevents spaces of negative length
+        result[1::2] = [str(x) for x in nums]
+        print(("-------- {:<{}} " * len_this_chunk).format(*result))
+        
         # print top n_words frequent words
         for i in range(n_words):
+            #generate space between strings 
+            result = [None]*len(these_topics)*2
+            result[::2] = feature_names[sorting[these_topics, i]]
+            result[1::2] = [str(x+2) for x in max_feat_len]
             try:
-                print(("{:<14}" * len_this_chunk).format(
-                    *feature_names[sorting[these_topics, i]]))
+                print(("{:<{}}" * len_this_chunk).format(*result))
             except:
                 pass
         print("\n")
 
-
+        
 def get_tree(tree, **kwargs):
     try:
         # python3

From 57046630ac4cb9143ded41f562606d4039cfa711 Mon Sep 17 00:00:00 2001
From: Ryan Alcantara <38192529+alcantarar@users.noreply.github.com>
Date: Fri, 26 Apr 2019 19:08:09 -0600
Subject: [PATCH 2/2] cleanup unused variables

---
 mglearn/tools.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/mglearn/tools.py b/mglearn/tools.py
index 6b4cb4a..3d952be 100644
--- a/mglearn/tools.py
+++ b/mglearn/tools.py
@@ -98,11 +98,8 @@ def print_topics(topics, feature_names, sorting, topics_per_chunk=6,
         these_topics = topics[i: i + topics_per_chunk]
         # maybe we have less than topics_per_chunk left
         len_this_chunk = len(these_topics)
-        # get max length of feature names
-        row = []
-        feat_len = []
-        
         #generate list of sorted features and their lengths
+        row = []
         for i in range(n_words):
             row.append(feature_names[sorting[these_topics, i]])
         topic_words = np.array(row).T