diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 00000000..26d33521
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/amazon-textract-serverless-large-scale-document-processing.iml b/.idea/amazon-textract-serverless-large-scale-document-processing.iml
new file mode 100644
index 00000000..d0876a78
--- /dev/null
+++ b/.idea/amazon-textract-serverless-large-scale-document-processing.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 00000000..105ce2da
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 00000000..2aca4c44
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 00000000..94a25f7f
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/datastore.py b/src/datastore.py
index d4ff570c..be311f0a 100644
--- a/src/datastore.py
+++ b/src/datastore.py
@@ -48,10 +48,11 @@ def updateDocumentStatus(self, documentId, documentStatus):
try:
table.update_item(
Key = { 'documentId': documentId },
- UpdateExpression = 'SET documentStatus= :documentstatusValue',
+ UpdateExpression = 'SET documentStatus= :documentstatusValue, documentCompletedOn = :documentCompletedOnValue',
ConditionExpression = 'attribute_exists(documentId)',
ExpressionAttributeValues = {
- ':documentstatusValue': documentStatus
+ ':documentstatusValue': documentStatus,
+ ':documentCompletedOnValue': str(datetime.datetime.utcnow())
}
)
except ClientError as e:
diff --git a/src/jobresultsproc.py b/src/jobresultsproc.py
index cdc8c260..17bed4fd 100644
--- a/src/jobresultsproc.py
+++ b/src/jobresultsproc.py
@@ -55,6 +55,32 @@ def processRequest(request):
objectName = request['objectName']
outputTable = request["outputTable"]
documentsTable = request["documentsTable"]
+ qUrl = request["dlqQueueUrl"]
+
+ if jobStatus == 'FAILED':
+
+ print("DocumentId: {}".format(jobTag))
+
+ ds = datastore.DocumentStore(documentsTable, outputTable)
+ ds.updateDocumentStatus(jobTag, jobStatus)
+
+ output = "Processed -> Document: {}, Object: {}/{} processed.".format(jobTag, bucketName, objectName)
+ print(output)
+
+ features = ["Text", "Forms", "Tables"]
+ jsonMessage = {'documentId': jobTag,
+ "features": features,
+ 'bucketName': bucketName,
+ 'objectName': objectName}
+
+ client = AwsHelper().getClient('sqs')
+
+ message = json.dumps(jsonMessage)
+ client.send_message(QueueUrl=qUrl, MessageBody=message)
+
+ print("Submitted message to DLQ queue: {}".format(message))
+
+ return
pages = getJobResults(jobAPI, jobId)
@@ -109,6 +135,7 @@ def lambda_handler(event, context):
request["outputTable"] = os.environ['OUTPUT_TABLE']
request["documentsTable"] = os.environ['DOCUMENTS_TABLE']
+ request["dlqQueueUrl"] = os.environ['DLQ_QUEUE_URL']
return processRequest(request)
diff --git a/textract-pipeline/lib/textract-pipeline-stack.ts b/textract-pipeline/lib/textract-pipeline-stack.ts
index c730463d..b0760aba 100644
--- a/textract-pipeline/lib/textract-pipeline-stack.ts
+++ b/textract-pipeline/lib/textract-pipeline-stack.ts
@@ -286,6 +286,7 @@ export class TextractPipelineStack extends cdk.Stack {
environment: {
OUTPUT_TABLE: outputTable.tableName,
DOCUMENTS_TABLE: documentsTable.tableName,
+ DLQ_QUEUE_URL: dlq.queueUrl,
AWS_DATA_PATH : "models"
}
});