tools-github-bwb/git_utils.rb at master · statengeneraal/tools-github-bwb · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
require 'open-uri'
require 'uri'
require 'zip'
require_relative 'markdown_utils'

BWB_RESULT = 'regelingInfoLijst'

BWB_ID = 'bwbId'
KIND = 'regelingSoort'
OFFICIAL_TITLE = 'officieleTitel'
DATE_LAST_MODIFIED = 'datumLaatsteWijziging'
EXPIRATION_DATE = 'vervalDatum'
ENTRY_DATE = 'inwerkingtredingsDatum'
TITLE = 'titel'
STATUS = 'status'
NON_OFFICIAL_TITLE_LIST = 'nietOfficieleTitels'
ABBREVIATION_LIST = 'afkortingen'
CITE_TITLE_LIST = 'citeertitels'
GENERATED_ON = 'gegenereerdOp'
LAW_LIST = 'regelingInfoLijst'
PATH='path'

# Module containing methods for converting BWB XML to Markdown, commiting repos, parsing the BWB list, etc.
# TODO tear some stuff apart
module GitUtils
  MARKDOWN_FOLDER = 'md'
  MARKDOWN_GIT_FOLDER = "#{MARKDOWN_FOLDER}/.git"
  XML_FOLDER = 'xml'
  BWB_JSON='bwb_list.json'
  INDEX_PATH = "#{MARKDOWN_FOLDER}/index.json"

  def get_xml(bwb_id, entry)
    cache_path = "cache/#{bwb_id}.#{entry[DATE_LAST_MODIFIED]}.xml"
    old_cache_path = "cache/#{bwb_id}%2F#{entry[DATE_LAST_MODIFIED]}.xml"
    if File.exist? cache_path
      str_xml = File.open(cache_path).read
    elsif File.exist? old_cache_path
      str_xml = File.open(old_cache_path).read
    else
      str_xml = open("http://wetten.cloudant.com/bwb/#{bwb_id}:#{entry[DATE_LAST_MODIFIED]}/data.xml").read.force_encoding('utf-8')

      #Write to cache
      FileUtils.mkdir_p 'cache' unless File.exists?('cache') # Make sure that path exists
      File.open(cache_path, 'w+') do |f|
        f.puts str_xml
      end
    end
    if str_xml == nil
      str_xml = ''
    end
    str_xml
  end

  def write_xml_to_file(bwb_id, str_xml)
    FileUtils.mkdir_p XML_FOLDER unless File.exists?(XML_FOLDER) # Make sure that path exists
    xml_path = "#{XML_FOLDER}/#{bwb_id}.xml"
    open(xml_path, 'w+') do |f|
      f.puts str_xml
    end
  end

  def self.create_path(law)
    cite_titles = law[CITE_TITLE_LIST]
    shortest_title = law[OFFICIAL_TITLE]
    if cite_titles
      cite_titles.each do |cite_title|
        if shortest_title == nil or cite_title[TITLE].length < shortest_title.length
          shortest_title = cite_title[TITLE]
        end
      end
    end

    non_official_titles = law[NON_OFFICIAL_TITLE_LIST]
    if non_official_titles
      non_official_titles.each do |title|
        if shortest_title == nil or title.length < shortest_title.length
          shortest_title = title
        end
      end
    end

    if shortest_title
      words = shortest_title.split(/ /)

      words.map! do |word|
        case word.downcase
          when 'con', 'prn', 'aux', 'nul', /com[0-9]/, /lpt[0-9]/, /^\.+$/
            # Escape Windows-unfriendly folders, e.g., driver file or only periods
            # See http://support.microsoft.com/kb/74496/en-us
            word = "_#{word}_"
          else
        end
        word.gsub!(/^\.+/, '') # Replace leading periods with ''
        word.gsub!(/["\/\^\?<>:\*\|]/, '') # Replace any non-valid char with '', see http://support.grouplogic.com/?p=1607
        word.gsub!(/[,°]/, '') # Replace ugly chars with ''
        word.downcase! # For Windows /CONCERNANT/ and /ConCerNanT/ are the same folders, so just downcase it
        word
      end

      escaped = ''
      words.each do |word|
        if escaped.length + word.length < 50 #Don't exceed 50 chars (note that Windows has a 255 char limit for paths)
          escaped << "/#{word}"
        else
          escaped << '/etc'
          break
        end
      end
      unless escaped.start_with? '/'
        escaped = '/'+escaped
      end

      path = "#{law[KIND]}#{escaped}/#{law[BWB_ID]}"

      # if /[^A-Za-z0-9 ,]/ =~ shortest_title
      #shortest_title
      # end
    else
      path = "#{law[KIND]}/#{law[BWB_ID]}"
    end
    path.gsub!(/\/\/+/, '/') # Remove duplicate /'s
    path.gsub!(/\/+$/, '') # Remove trailing /'s # Although there are none b/c the path always ends with /[BWBID]
    # puts path

    path
  end

  def save_bwb_list_xml
    puts 'Downloading XML'
    zipped_file = open("http://wetten.overheid.nl/BWBIdService/BWBIdList.xml.zip")
    # zipped_file = open('C:\Users\Maarten\Desktop\BWBIdList.zip')

    xml_source = nil
    Zip::File.open(zipped_file) do |zip|
      xml_source = zip.read('BWBIdList.xml').force_encoding('UTF-8')
    end
    if xml_source == nil
      throw :could_not_read_xml
    end

    # Write xml to file
    FileUtils.mkdir_p(XML_FOLDER) unless File.exists?(XML_FOLDER) # Make sure that path exists
    xml_path = "#{XML_FOLDER}/bwbIdList/BWBIdList.xml"
    open(xml_path, 'w+') do |f|
      f.puts xml_source
    end

    commit_xml_repo Time.now.strftime('%Y-%m-%d'), add=['bwbIdList/BWBIdList.xml'], message='BWBIdList '
  end

  # Change given string with given updates at the matches of the given regex
  def substitute!(regexp, string, updates)
    match = regexp.match(string)
    if match
      keys_in_order = updates.keys.sort_by { |k| match.offset(k) }.reverse
      keys_in_order.each do |k|
        offsets_for_group = match.offset(k)
        string[offsets_for_group.first...offsets_for_group.last] = updates[k]
      end
    end
  end

  # Get relative path from a document to a document
  def get_path_to(from, to)
    directories_back = from.count('/')+1
    ('../' * directories_back)+to+'/README.md' #/README.md
  end

  # Git commit the markdown repository in the /md folder
  def commit_markdown_repo(date, ar_add=nil, message='')
    Dir.chdir(MARKDOWN_FOLDER)
    if ar_add and ar_add.length > 0
      adds = ar_add.reduce('') do |sum, value|
        sum + "\"#{value}\" "
      end
      # puts "git add #{adds}"
      system("git add #{adds}")
    else
      # puts 'git add --all'
      system('git add --all')
    end
    # puts "git commit -am \"#{message+date}\" --quiet --date '#{get_author_date(date)}'"
    system("git commit -am \"#{message+date}\" --quiet --date '#{get_author_date(date)}'") #
    Dir.chdir('..')
  end

  # Return epoch time if our date is before that
  def get_author_date(str_date)
    /([0-9]{4})-([0-9]{2})-([0-9]{2})/ =~ str_date
    date = Time.new($1, $2, $3, 9, 0, 0, '+02:00') # Create time out of date string, 9 'o clock Amsterdam time

    if date < Time.at(0)
      # puts 'WARNING: Date was before epoch (1-1-1970)'
      Time.at(0).iso8601(0)
    else
      date.iso8601(0)
    end
  end

  # # Split the list into laws that are in effect and laws that have been retracted
  # def split_effective index
  #   today = Date.today.strftime("%Y-%m-%d")
  #
  #   effectives = {}
  #   retracted = {}
  #   index.each do |bwb_id, regeling_info|
  #     expiration = regeling_info[EXPIRATION_DATE].strip
  #     if expiration and expiration.length > 0 and expiration < today
  #       retracted[bwb_id] = regeling_info
  #     else
  #       effectives[bwb_id] = regeling_info
  #     end
  #   end
  #
  #   return effectives, retracted
  # end

  def git_gc_xml
    puts "Garbage collecting /xml/.git ..."
    Dir.chdir(XML_FOLDER)
    system("git gc")
    Dir.chdir('..')
  end
  def git_gc_md
    puts "Garbage collecting /md/.git ..."
    Dir.chdir(MARKDOWN_FOLDER)
    system("git gc")
    Dir.chdir('..')
  end

  # Git commit the XML repository in the /xml folder
  def commit_xml_repo(author_date, array_add=nil, message='')
    Dir.chdir(XML_FOLDER)
    if array_add and array_add.length > 0
      str_add = array_add.reduce('') do |sum, value|
        sum + "\"#{value}\" "
      end
      # puts "git add #{str_add}"
      system("git add #{str_add}")
    else
      # puts 'git add .'
      system('git add .')
    end
    # puts "git commit -am \"#{message+author_date}\" --quiet --date '#{get_author_date(author_date)}'"
    system("git commit -am \"#{message+author_date}\" --quiet --date '#{get_author_date(author_date)}'") #

    Dir.chdir('..')
  end

  def pull_markdown_repo
    Dir.chdir(MARKDOWN_FOLDER)
    system("git pull #{ENV['HTTPS_REPO']} master")
    Dir.chdir('..')
  end

  def push_markdown_repo
    Dir.chdir(MARKDOWN_FOLDER)
    system("git push #{ENV['HTTPS_REPO']} master")
    Dir.chdir('..')
  end
end