-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcollator.py
More file actions
173 lines (145 loc) · 4.62 KB
/
collator.py
File metadata and controls
173 lines (145 loc) · 4.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
"""
Simple tool to help organize my personal music library.
Copyright (C) 2017, Dave Brookshire <dave@brookshire.org>
"""
import hashlib
import os
import re
import time
import eyed3
debug = False
audio_file_re = re.compile(".+\.(?P<extension>mp3|m4a)$", re.IGNORECASE)
class CollatorFile():
"""
Abstraction of individual files in library collection, using fields in the actual name of the files
and certain ID3 tags to attempt to normalize the formatting and organization of the library.
"""
fname = None
hash = None
audiofile = None
ext = None
def __init__(self, fname):
self.fname = fname
self.basename = os.path.basename(self.fname)
self.hash = self.generate_hash()
self.audiofile = eyed3.load(self.fname)
mo = audio_file_re.match(self.basename)
if mo:
self.ext = mo.group("extension")
def __str__(self):
ret = "Title: {0}, Artist: {1}, Album: {2}".format(os.path.basename(self.fname),
self.path_artist,
self.path_album)
ret += "\nTitle Tag: {0} ({1})".format(self.title_tag, self.ext)
ret += "\nAlbum Tag: {0}".format(self.album_tag)
ret += "\nArtist Tag: {0}".format(self.artist_tag)
return ret
@property
def title_tag(self):
"""
Lookup the ID3 Title tag value.
:return:
"""
title_tag = None
try:
title_tag = self.audiofile.tag.title
except AttributeError:
pass
return title_tag
@property
def album_tag(self):
"""
Lookup the ID3 Album tag value.
:return:
"""
album_tag = None
try:
album_tag = self.audiofile.tag.album
except AttributeError:
pass
return album_tag
@property
def artist_tag(self):
"""
Lookup the ID3 Artist tag value.
:return:
"""
artist_tag = None
try:
artist_tag = self.audiofile.tag.artist
except AttributeError:
pass
return artist_tag
def generate_hash(self):
"""
Generate SHA1 hash value for the file. We'll use this to uniquely identify files and help
determine duplicates.
:return:
"""
BLOCKSIZE = 65536
hasher = hashlib.sha1()
with open(self.fname, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(BLOCKSIZE)
return hasher.hexdigest()
def is_audio_file(self):
"""
Determine if the file is an audio file or not. Right now, this is based soley on the extension
of the file name.
:return:
"""
return self.ext is not None
def path_elements(self):
return self.fname.split('/')
@property
def path_artist(self):
return self.path_elements()[4]
@property
def path_album(self):
return self.path_elements()[5]
def build_library(path):
"""
Recursively build a library of audio files given a directory path.
:param path:
:param debug:
:return:
"""
library = []
unknowns = []
if debug:
print("Finding audio files in {0}".format(path))
found = os.listdir(path)
for f in found:
fpath = os.path.join(path, f).replace("\\", "/")
if os.path.isdir(fpath):
l, u = build_library(fpath)
library.extend(l)
unknowns.extend(u)
elif os.path.isfile(fpath):
ncf = CollatorFile(fpath)
if ncf.is_audio_file():
library.append(ncf)
else:
unknowns.append(fpath)
else:
unknowns.append(fpath)
if debug:
print("Found {0} audio files, and {1} unknown files".format(len(library),
len(unknowns)))
return library, unknowns
if __name__ == '__main__':
start = time.time()
library, unknowns = build_library("/home/ec2-user/dev.music")
stop = time.time()
print("Processed library in {0} seconds".format(stop - start))
print("Library consists of {0} discovered and identified audio files".format(len(library)))
print("Found {0} unknown or unidentifiable files".format(len(unknowns)))
for t in library:
print(t)
if unknowns and debug:
print("\n\nUnknown Files")
print("-------------")
for u in unknowns:
print(" {0}".format(u))