MeaningCloud
diff --git a/‎README.md‎
Lines changed: 12 additions & 6 deletions b/‎README.md‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎example/Client.py‎
Lines changed: 8 additions & 7 deletions b/‎example/Client.py‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎example/mc_showcase.py‎
Lines changed: 18 additions & 35 deletions b/‎example/mc_showcase.py‎
Lines changed: 18 additions & 35 deletions
diff --git a/‎meaningcloud/ClusteringRequest.py‎
Lines changed: 46 additions & 0 deletions b/‎meaningcloud/ClusteringRequest.py‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎meaningcloud/ClusteringResponse.py‎
Lines changed: 86 additions & 0 deletions b/‎meaningcloud/ClusteringResponse.py‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎meaningcloud/DeepCategorizationRequest.py‎
Lines changed: 63 additions & 0 deletions b/‎meaningcloud/DeepCategorizationRequest.py‎
Lines changed: 63 additions & 0 deletions
@@ -39,23 +39,29 @@ And we are always available at support@meaningcloud.com
 This SDK currently contains the following:
 
 - **Request**: manages requests to any of MeaningCloud's APIS. It can also be used to directly generate requests without using specific classes .
-    - **LanguageRequest**: models a request to MeaningCloud Language Identification API.
-    - **TopicsRequest**: models a request to MeaningCloud TopicsExtraction API.
     - **ClassRequest**: models a request to MeaningCloud Text Classification API.
-    - **SentimentRequest**: models a request to MeaningCloud Sentiment Analysis API.
+    - **ClusteringRequest**: models a request to MeaningCloud Text Clustering API.
+    - **DeepCategorizationRequest**: models a request to MeaningCloud Deep Categorization API.
+    - **LanguageRequest**: models a request to MeaningCloud Language Identification API.
     - **ParserRequest**: models a request to Meaningcloud Lemmatization, PoS and Parsing API.
+    - **SentimentRequest**: models a request to MeaningCloud Sentiment Analysis API.
+    - **SummarizationRequest**: models a request to Meaningcloud Summarization API.
+    - **TopicsRequest**: models a request to MeaningCloud TopicsExtraction API.
 - **Response**: models a generic response from the MeaningCloud API.
-    - **TopicsResponse**: models a response from the Topic Extraction API, providing auxiliary functions to work with the response, extracting the different types of topics and some of the most used fields in them.
     - **ClassResponse**: models a response from the Text Classification API, providing auxiliary functions to work with the response and extract the different fields in each category.
-    - **SentimentResponse**: models a response from the Sentiment Analysis API, providing auxiliary functions to work with the response and extract the sentiment detected at different levels and for different elements.
+    - **ClusteringResponse**: models a response from the Text Clustering API, providing auxiliary functions to work with the response and extract the different fields in each cluster.
+    - **DeepCategorizationResponse**: models a response from the Deep Categorization API, providing auxiliary functions to work with the response and extract the different fields in each category.
     - **LanguageResponse**: models a response from the Language Identification API, providing auxiliary functions to work with the response and extract the sentiment detected at different levels and for different elements.
     - **ParserResponse**: models a response from the Lemmatization, PoS and Parsing API, providing auxiliary functions to work with the response and extract the lemmatization and PoS tagging of the text provided.
+    - **SentimentResponse**: models a response from the Sentiment Analysis API, providing auxiliary functions to work with the response and extract the sentiment detected at different levels and for different elements.
+    - **SummarizationResponse**: models a response from the Summarization API, providing auxiliary functions to work with the response and obtain the summary extracted.
+    - **TopicsResponse**: models a response from the Topic Extraction API, providing auxiliary functions to work with the response, extracting the different types of topics and some of the most used fields in them.
 
 ### Usage
 
 In the _example_ folder, there are two examples:
 - **Client.py**, which contains a simple example on how to use the SDK
-- **mc_showcase**, which implements a pipeline where plain text files are read from a folder, and two CSV files result as output: one with several types of analyses done over each text, and the results from running Text Clustering over the complete collection.
+- **mc_showcase**, which implements a pipeline where plain text files are read from a folder, and two CSV files result as output: one with several types of analyses done over each text, and the results from running [Text Clustering](https://www.meaningcloud.com/developer/text-clustering) over the complete collection.
     The analyses done are:
 
   * [Language Identification](https://www.meaningcloud.com/developer/language-identification): detects the language and returns code or name
 
@@ -10,7 +10,7 @@
 model = 'IAB_en'
 
 # @param license_key - Your license key (found in the subscription section in https://www.meaningcloud.com/developer/)
-license_key = '<your_license_key>'
+license_key = '<<<<< your license key >>>>>'
 
 # @param text - Text to use for different API calls
 text = 'London is a very nice city but I also love Madrid.'
@@ -33,7 +33,7 @@
                       topics_response.getTypeLastNode(topics_response.getOntoType(entity)) + "\n")
 
         else:
-            print("\nOh no! There was the following error: " + topics_response.getStatusMsg() + "\n")
+            print("\tNo entities detected!\n")
     else:
         if topics_response.getResponse() is None:
             print("\nOh no! The request sent did not return a Json\n")
@@ -60,11 +60,12 @@
     # If there are no errors in the request, we will use the language detected to make a request to Sentiment and Topics
     if lang_response.isSuccessful():
         print("\nThe request to 'Language Identification' finished successfully!\n")
-
-        results = lang_response.getResults()
-        if 'language_list' in results.keys() and results['language_list']:
-            language = results['language_list'][0]['language']
-            print("\tLanguage detected: " + results['language_list'][0]['name'] + ' (' + language + ")\n")
+        languages = lang_response.getLanguages()
+        if languages:
+            language = lang_response.getLanguageCode(languages[0])
+            print("\tLanguage detected: " + lang_response.getLanguageName(languages[0]) + ' (' + language + ")\n")
+        else:
+            print("\tNo language detected!\n")
 
     # We are going to make a request to the Lemmatization, PoS and Parsing API
     parser_response = meaningcloud.ParserResponse(
 
@@ -47,13 +47,13 @@ def getSentimentAnalysis(text):
 # Calls Language Detection and returns the code or name for the text
 def detectLanguage(text, get_name=False):
     language = ''
-    # We are going to make a request to the Sentiment Analysis API
+    # We are going to make a request to the Language Identification API
     print("\tDetecting language...")
     lang_response = meaningcloud.LanguageResponse(meaningcloud.LanguageRequest(license_key, txt=text).sendReq())
     if lang_response.isSuccessful():
-        langs = lang_response.getResults()['language_list']
+        langs = lang_response.getLanguages()
         if langs:
-            language = langs[0]['language'] if not get_name else langs[0]['name']
+            language = lang_response.getLanguageCode(langs[0]) if not get_name else lang_response.getLanguageName(langs[0])
     else:
         print("\tOops! Request to detect language was not succesful: (" + lang_response.getStatusCode() + ') ' + lang_response.getStatusMsg())
     return language
@@ -114,14 +114,10 @@ def getDeepCategorization(text, model, num_cats):
     # We are going to make a request to the Deep Categorization API
     formatted_categories = ''
     print("\tGetting " + model[0:len(model) - 3].replace('_', ' ') + " analysis...")
-    deepcat = meaningcloud.Request(url="https://api.meaningcloud.com/deepcategorization-1.0", key=license_key)
-    deepcat.addParam('model', model)
-    deepcat.setContentTxt(text)
-    deepcat_response = meaningcloud.Response(deepcat.sendRequest())
+    deepcat_response = meaningcloud.DeepCategorizationResponse(meaningcloud.DeepCategorizationRequest(license_key, model=model, txt=text).sendReq())
     if deepcat_response.isSuccessful():
-        cat_results = deepcat_response.getResults()
-        categories = cat_results['category_list'] if (('category_list' in cat_results.keys()) and (cat_results['category_list'] is not None)) else {}
-        formatted_categories = (', '.join(cat['label'] + ' (' + cat['relevance'] +')' for cat in categories[:num_cats])) if categories else '(none)'
+        categories = deepcat_response.getCategories()
+        formatted_categories = (', '.join(deepcat_response.getCategoryLabel(cat) + ' (' + deepcat_response.getCategoryRelevance(cat) +')' for cat in categories[:num_cats])) if categories else '(none)'
     else:
         print("\tOops! Request to Deep Categorization was not succesful: (" + deepcat_response.getStatusCode() + ') ' + deepcat_response.getStatusMsg())
 
@@ -135,7 +131,7 @@ def getTextClassification(text, model, num_cats):
     class_response = meaningcloud.ClassResponse(meaningcloud.ClassRequest(license_key, txt=text, model=model, otherparams={'txtf': 'markup'}).sendReq())
     if class_response.isSuccessful():
         categories = class_response.getCategories()
-        formatted_categories = (', '.join(class_response.getCategoryLabel(cat) + ' (' + class_response.getCategoryRelevance(cat) +')' for cat in categories[:num_cats])) if categories else '(none)'        
+        formatted_categories = (', '.join(class_response.getCategoryLabel(cat) + ' (' + class_response.getCategoryRelevance(cat) +')' for cat in categories[:num_cats])) if categories else '(none)'
     else:
         print("\tOops! The request to Text Classification was not succesful: (" + class_response.getStatusCode() + ') ' + class_response.getStatusMsg())
 
@@ -144,15 +140,12 @@ def getTextClassification(text, model, num_cats):
 
 # Calls Summarization and obtains an extractive summary with the number of sentences especified
 def getSummarization(text, sentences):
-    # We are going to make a request to the Deep Categorization API
+    # We are going to make a request to the Summarization API
     summary = ''
     print("\tGetting automatic summarization...")
-    summarization = meaningcloud.Request(url="https://api.meaningcloud.com/summarization-1.0", key=license_key)
-    summarization.addParam('sentences', sentences)
-    summarization.setContentTxt(text)
-    summarization_response = meaningcloud.Response(summarization.sendRequest())
+    summarization_response = meaningcloud.SummarizationResponse(meaningcloud.SummarizationRequest(license_key, sentences=sentences, txt=text).sendReq())
     if summarization_response.isSuccessful():
-        summary = summarization_response.getResults()['summary']
+        summary = summarization_response.getSummary()
     else:
         print("\tOops! Request to Summarization was not succesful: (" + summarization_response.getStatusCode() + ') ' + summarization_response.getStatusMsg())
 
@@ -164,22 +157,12 @@ def getClustering(text_collection):
 
     # We are going to make a request to the Clustering API
     print("Getting clustering analysis...")
-    clustering = meaningcloud.Request(url="https://api.meaningcloud.com/clustering-1.1", key=license_key)
-    clustering.addParam('lang','en')
-    clustering.addParam('mode','tm')
-    texts = "\r\n".join(val.replace("\r", ' ').replace("\n", " ").replace("\f", " ") for val in text_collection.values())
-    ids = "\r\n".join(text_collection.keys())
-    clustering.setContentTxt(texts)
-    clustering.addParam('id', ids)
-
-    clustering_response = meaningcloud.Response(clustering.sendRequest())
-
+    clustering_response = meaningcloud.ClusteringResponse(meaningcloud.ClusteringRequest(license_key, lang='en', texts=text_collection).sendReq())
     if clustering_response.isSuccessful():
-        results = clustering_response.getResults()
-        clusters = results['cluster_list'] if (('cluster_list' in results.keys()) and (results['cluster_list'] is not None)) else {}
-        titles = [cl['title'] for cl in clusters]
-        sizes = [cl['size'] for cl in clusters]
-        scores = [float(cl['score']) for cl in clusters]
+        clusters = clustering_response.getClusters()
+        titles = [clustering_response.getClusterTitle(cl) for cl in clusters]
+        sizes = [clustering_response.getClusterSize(cl) for cl in clusters]
+        scores = [clustering_response.getClusterScore(cl) for cl in clusters]
         docs = [', '.join(cl['document_list'].keys()) for cl in clusters]
         return titles, sizes, scores, docs
     else:
@@ -244,7 +227,7 @@ def analyzeText(text, fibo=False):
     # read files
     input_files = {}
     for file_name in os.listdir('./' + input_folder):
-        f = open(input_folder + '/' + file_name)
+        f = open(input_folder + '/' + file_name, 'r', encoding='utf-8', errors='ignore')
         if f.mode == 'r':
             input_files[file_name] = f.read()
 
@@ -259,12 +242,12 @@ def analyzeText(text, fibo=False):
     df[label_list] = df['Text'].apply(analyzeText, fibo=get_fibo)
     df.to_csv('./' + output_file + '.csv', index_label='File_name')
     print("Results printed to '"+ output_file + ".csv'!")
-    #print(df)
+    # print(df)
 
 
     # Cluster all files
     resulting_clusters = getClustering(input_files)
     df_clusters = pd.DataFrame( {'Cluster_Name': resulting_clusters[0], 'Size': resulting_clusters[1], 'Score': resulting_clusters[2], 'Documents': resulting_clusters[3]})
     df_clusters.to_csv('./' + output_file + '_clusters.csv', index_label='Cluster_ID')
     print("Clustering results printed to '"+ output_file + "_clusters.csv'!")
-    #print(df_clusters)
+    # print(df_clusters)
@@ -0,0 +1,46 @@
+import meaningcloud.Request
+
+
+class ClusteringRequest(meaningcloud.Request):
+
+    URL = 'https://api.meaningcloud.com/clustering-1.1'
+    otherparams = None
+    extraheaders = None
+    type_ = ""
+
+    def __init__(self, key, lang, texts, mode='tm', otherparams=None, extraheaders=None):
+        """
+        ClusteringRequest constructor
+
+        :param key:
+            license key
+        :param lang:
+            language of the text
+        :param txt:
+            Collection of texts to cluster. Dictionary expected where the keys are the IDs of the text/doc
+        :param mode:
+            Clustering algorithm
+        :param otherparams:
+            Array where other params can be added to be used in the API call
+        :param extraheaders:
+            Array where other headers can be added to be used in the request
+        """
+
+        self._params = {}
+        meaningcloud.Request.__init__(self, self.URL, key)
+        self.otherarams = otherparams
+        self.extraheaders = extraheaders
+        self._url = self.URL
+
+        self.addParam('key', key)
+        self.addParam('lang', lang)
+        self.addParam('mode', mode)            
+        self.addParam('txt', "\r\n".join(val.replace("\r", ' ').replace("\n", " ").replace("\f", " ") for val in texts.values()))
+        self.addParam('id', "\r\n".join(texts.keys()))
+
+        if (otherparams):
+            for key in otherparams:
+                self.addParam(key, otherparams[key])
+
+    def sendReq(self):
+        return self.sendRequest(self.extraheaders)
@@ -0,0 +1,86 @@
+import meaningcloud.Response
+
+
+class ClusteringResponse(meaningcloud.Response):
+
+    def __init__(self, response):
+        """
+        ClusteringResponse constructor
+
+        :param response:
+            String returned by the request
+        """
+
+        if not response:
+            raise Exception("The request sent did not return a response")
+        meaningcloud.Response.__init__(self, response)
+
+    def getClusters(self):
+        """
+        Get clusters found for the texts sent
+
+        :return:
+            Array with the categories detected
+        """
+
+        return (self._response['cluster_list']
+                if (('cluster_list' in self._response.keys()) and (self._response['cluster_list'] is not None))
+                else {})
+
+    # Generic auxiliary functions
+
+    def getClusterTitle(self, cluster):
+        """
+        Get the title of a cluster
+
+        :param cluster:
+            Cluster you want the title from
+        :return:
+            Cluster title
+        """
+
+        return (cluster['title']
+                if ((len(cluster) > 0) and ('title' in cluster.keys()) and (cluster['title'] is not None))
+                else "")
+
+    def getClusterSize(self, cluster):
+        """
+        Get the size of a cluster
+
+        :param cluster:
+            Cluster you want the size from
+        :return:
+            Cluster size
+        """
+
+        return (cluster['size']
+                if ((len(cluster) > 0) and ('size' in cluster.keys()) and (cluster['size'] is not None))
+                else "")
+
+    def getClusterScore(self, cluster):
+        """
+        Get the score of a cluster
+
+        :param cluster:
+            Cluster you want the score from
+        :return:
+            Cluster score
+        """
+
+        return (cluster['score']
+                if ((len(cluster) > 0) and ('score' in cluster.keys()) and (cluster['score'] is not None))
+                else "")
+
+    def getClusterDocuments(self, cluster):
+        """
+        Get the list of documents in a cluster
+
+        :param cluster:
+            Cluster you want the relevance from
+        :return:
+            Cluster relevance
+        """
+
+        return (self._response['document_list']
+                if (('document_list' in self._response.keys()) and (self._response['document_list'] is not None))
+                else {})
@@ -0,0 +1,63 @@
+import meaningcloud.Request
+
+
+class DeepCategorizationRequest(meaningcloud.Request):
+
+    URL = 'https://api.meaningcloud.com/deepcategorization-1.0'
+    otherparams = None
+    extraheaders = None
+    type_ = ""
+
+    def __init__(self, key, model, txt=None, url=None, doc=None, polarity='n', otherparams=None, extraheaders=None):
+        """
+        DeepCategorizationRequest constructor
+
+        :param key:
+            license key
+        :param txt:
+            Text to use in the API calls
+        :param url:
+            Url to use in the API calls
+        :param doc:
+            File to use in the API calls
+        :param model:
+            Name of the model to use in the classification
+        :param polarity:
+            Determines if categories will contain an associated polarity value.
+        :param otherparams:
+            Array where other params can be added to be used in the API call
+        :param extraheaders:
+            Array where other headers can be added to be used in the request
+        """
+
+        self._params = {}
+        meaningcloud.Request.__init__(self, self.URL, key)
+        self.otherarams = otherparams
+        self.extraheaders = extraheaders
+        self._url = self.URL
+
+        self.addParam('key', key)
+        self.addParam('model', model)
+        self.addParam('polarity', polarity)
+
+        if txt:
+            type_ = 'txt'
+        elif doc:
+            type_ = 'doc'
+        elif url:
+            type_ = 'url'
+        else:
+            type_ = 'default'
+
+        options = {'doc': lambda: self.setContentFile(doc),
+                   'url': lambda: self.setContentUrl(url),
+                   'txt': lambda: self.setContentTxt(txt),
+                   'default': lambda: self.setContentTxt(txt)
+                   }
+        options[type_]()
+        if (otherparams):
+            for key in otherparams:
+                self.addParam(key, otherparams[key])
+
+    def sendReq(self):
+        return self.sendRequest(self.extraheaders)