69 lines
1.7 KiB
Lua
69 lines
1.7 KiB
Lua
local htmlparser = require("htmlparser")
|
|
local lfs = require("lfs")
|
|
local cURL = require("cURL")
|
|
local iconv = require("iconv")
|
|
local json = require("cjson")
|
|
cd, err = iconv.new('UTF-8', 'WINDOWS-1251')
|
|
|
|
-- настройки
|
|
|
|
htmlparser_looplimit = 10000 -- лимит для парсинга
|
|
|
|
local url = 'https://classinform.ru/classifikator-vidov-razreshennogo-ispolzovaniia-zemelnykh-uchastkov.html' -- исходный url
|
|
|
|
local results = {}
|
|
|
|
local csvfile = assert(io.open('cvrzu.csv', "w"))
|
|
|
|
local function get(url)
|
|
local str=''
|
|
local headers = {
|
|
'Content-type: text/html',
|
|
'User-Agent: Mozilla/5.0'
|
|
}
|
|
local c = cURL.easy{
|
|
url = url,
|
|
httpheader = headers,
|
|
writefunction = function(st)
|
|
str = str..st
|
|
end
|
|
}
|
|
local ok, err = c:perform()
|
|
c:close()
|
|
if not ok then return nil, err end
|
|
return cd:iconv(str)
|
|
end
|
|
|
|
local function parse(url,first)
|
|
local result = {}
|
|
local root = htmlparser.parse(get(url))
|
|
local divs = root:select(".full_width")
|
|
for _,div in ipairs(divs) do
|
|
local items = div:select("a")
|
|
if items and items[2] and string.find(items[1]:getcontent(),'%d+%.%d+') then
|
|
local code = items[1]:getcontent():gsub('- ','')
|
|
local name = items[2]:getcontent()
|
|
if not first then csvfile:write(';'..code..';'..name..'\n') end
|
|
result[code] = {name = name, url ='https:'..items[1].attributes["href"]}
|
|
end
|
|
end
|
|
return result
|
|
end
|
|
|
|
local function save(filename,data)
|
|
local file = io.open(filename,'w')
|
|
file:write(tostring(data))
|
|
file:close()
|
|
end
|
|
|
|
results = parse(url,true)
|
|
|
|
for code,data in pairs(results) do
|
|
csvfile:write(code..';;'..data.name..'\n')
|
|
results[code].sub = parse(data.url)
|
|
end
|
|
|
|
csvfile:close()
|
|
|
|
save('cvrzu.json',json.encode(results))
|