-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse.py
More file actions
104 lines (89 loc) · 2.68 KB
/
parse.py
File metadata and controls
104 lines (89 loc) · 2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from html.parser import HTMLParser
import os, django
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "snapps.settings")
django.setup()
from sangbok.models import *
# create a subclass and override the handler methods
class Song():
title = ''
predata = ''
text = ''
postdata = ''
category = ''
tempdata = ''
tempafterb = True
def print_song(self):
song = Snapsvisa()
song.name = self.title
song.pre = self.predata
song.lyrics = self.text
song.post = self.postdata
if(self.category == 'Fosterländska Sånger'):
song.category = Category.objects.get(id=3)
elif(self.category == 'Akademiska sånger'):
song.category = Category.objects.get(id=4)
elif(self.category == 'Teknologvisor'):
song.category = Category.objects.get(id=5)
elif(self.category == 'Snapsvisor'):
song.category = Category.objects.get(id=6)
elif(self.category == 'Vinvisor'):
song.category = Category.objects.get(id=7)
elif(self.category == 'Andra dryckesvisor'):
song.category = Category.objects.get(id=8)
elif(self.category == 'Punschvisor'):
song.category = Category.objects.get(id=9)
elif(self.category == 'Sama suomeksi'):
song.category = Category.objects.get(id=10)
elif(self.category == 'Diverse sånger'):
song.category = Category.objects.get(id=11)
song.save()
#print('*********SONG**************')
#print('TITLE:', self.title)
#print('PREDATA:', self.predata)
#print('TEXT:', self.text)
#print('POSTDATA:', self.postdata)
#print('CATEGORY:', self.category)
#print('*********ENDSONG***********')
class MyHTMLParser(HTMLParser):
song = None
cur_tag = None
def __init__(self, song):
super().__init__()
self.song = song
def handle_starttag(self, tag, attrs):
#print("tag:", tag)
if(tag == 'b' and song.title != ''):
self.song.postdata = self.song.tempdata
self.song.tempdata = ''
self.song.print_song()
self.song.tempafterb = True
self.cur_tag = tag
def handle_endtag(self, tag):
self.cur_tag = None
def handle_data(self, data):
if(self.cur_tag == 'i'):
self.song.tempdata += data
elif(self.cur_tag == None):
stripdata = data.strip()
if(stripdata != ''):
if(self.song.tempdata != ''):
if(self.song.tempafterb):
self.song.predata = self.song.tempdata
self.song.tempdata = ''
self.song.tempafterb = False
else:
self.song.text += ('\n'+self.song.tempdata)
self.song.tempdata = ''
self.song.text += stripdata
elif(self.cur_tag == 'b'):
self.song.title = data
self.song.text = ''
self.song.predata = ''
self.song.postdata = ''
elif(self.cur_tag == 's'):
self.song.category = data
song = Song()
parser = MyHTMLParser(song)
f = open('sangbok.txt')
txt = f.read()
parser.feed(txt)