-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSVD-LSI
More file actions
37 lines (33 loc) · 1.12 KB
/
SVD-LSI
File metadata and controls
37 lines (33 loc) · 1.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import numpy as np
import re
# Create Matrix of terms across text to analze
def txtMat(textLST, q, freqLST):
numfreq = []
if freqLST == None:
freqLST = []
for i in textLST:
sen = list(set(i.split()))
freqLST.extend([k for k in sen if k not in freqLST])
freqLST = sorted(freqLST, key=lambda v: v.upper())
for z in textLST:
sen = [(z.split()).count(j) for j in freqLST]
numfreq.append(sen)
q = np.transpose(np.array([(q.split()).count(h) for h in freqLST]))
return((np.array(numfreq).T),q)
# Rank approximation
def rankapprox(rank,mat):
U, S, Vt = np.linalg.svd(mat[0])
# rank-2 approximation
Um=np.matrix(U[:, :rank])
Sm=np.diag(S[:rank])
Vm=np.matrix(Vt[:rank, :])
return([Um,Sm,Vm])
#Rank based Analysis
def analysis(q,r,ts):
coords = np.transpose(q) * (r[0] * np.linalg.inv(r[1]))
tstlst = []
print(r[2])
for i in range(ts + 1):
test = np.dot(coords,r[2][:,i])/(np.linalg.norm(coords,2) * np.linalg.norm(r[2][:,i], 2))
tstlst.append(test[0,0])
return(tstlst)