sbis/parser.lua
2025-12-07 20:36:07 +04:00

163 lines
3.8 KiB
Lua
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

local csv = require('csv')
local lfs = require('lfs')
local json = require('cjson');
local dir = "./input" -- Директория выборки
local headers = { -- Заголовки csv
'name',
'industry',
'type',
'website',
'teams',
'emailAddress',
'billingAddressCity',
'billingAddress',
'phoneNumberOffice',
'phoneNumberOther',
'phoneNumberOther1',
'phoneNumberOther2',
'phoneNumberOther3',
'phoneNumberOther4'
}
function trim(s)
if not s then return s end
return (string.gsub(s,"^%s*(.-)%s*$", "%1"))
end
-- чтение файла
local function read_file(path)
local file = io.open(path) or error("Не могу открыть file: " .. path)
local content = file:read("*a") or error("Не могу прочитать " .. path)
content = string.gsub(content, '"', "'")
content = string.gsub(content, "''", "'")
file:close()
file = io.open(path,'w+')
local result = file:write(content) or error("Не могу записать " .. path)
file:close()
return content
end
-- выборка емейлов
local function get_mail(mails)
local emails = {}
for w in string.gmatch(mails, "[^,]+") do
if string.find(w,"info@") then
return trim(w)
end
table.insert(emails,w)
end
if emails[1]~=nil and string.find(emails[1], "@") then
return trim(emails[1])
else
return nil
end
end
-- выборка телефонов
local function get_phones(ph)
local phones = {
phoneNumberOffice ='',
phoneNumberOther = '',
phoneNumberOther1 = '',
phoneNumberOther2 = '',
phoneNumberOther3 = '',
phoneNumberOther4 = ''
}
local i = 0
for w in string.gmatch(ph, "[^,]+") do
i = i + 1
w = string.gsub(w,'[^%d]','')
if i == 1 then phones['phoneNumberOffice'] = w
elseif i == 2 then phones['phoneNumberOther'] = w
else
local c = (i-2)
phones['phoneNumberOther'..(i-2)] = w
end
end
return phones
end
-- разбор адреса
local function get_address(adr)
local address = {}
local city = string.match(adr,'г. ([^,]+),')
address['billingAddress'] = adr
if city then
address['billingAddressCity'] = city
else
address['billingAddressCity'] = ''
end
return address
end
-- разбор адреса сайта
local function get_site(st)
local urls = {}
for w in string.gmatch(st, "[^,]+") do
table.insert(urls,w)
end
if urls[1] ~= nil then
return 'https://'..trim(urls[1])
else
return trim(st)
end
end
-- разбор направление деятельности
local function get_industry(st)
local str = {}
for w in string.gmatch(st, "[^,]+") do
table.insert(str,w)
end
if str[1] ~= nil then
return trim(str[1])
else
return trim(st)
end
end
-- проход файлов
local function parse(dir)
file = io.open('output.csv','a+')
for entry in lfs.dir(dir) do
local mode = lfs.attributes(dir.."/" .. entry, "mode")
if mode == "file" and string.find(entry,".+\.csv$") then
print('Обрабатывается файл: '..dir.."/"..entry)
read_file(dir.."/"..entry)
local f = csv.open(dir.."/" .. entry,{separator=';'})
for fields in f:lines() do
local mail = get_mail(fields[10])
if mail then
local address = get_address(fields[4])
local phones = get_phones(fields[9])
file:write(fields[1]..';'..get_industry(fields[8])..';SBIS;'..get_site(fields[17])..';SBIS;'..mail..';'..address['billingAddressCity']..';'..address['billingAddress']..';'..phones['phoneNumberOffice']..';'..phones['phoneNumberOther']..';'..phones['phoneNumberOther1']..';'..phones['phoneNumberOther2']..';'..phones['phoneNumberOther3']..';'..phones['phoneNumberOther4']..'\n')
end
end
else
if (mode == "directory") and not (entry == ".") and not (entry == "..") then
parse(dir.."/" .. entry)
end
end
end
file:close()
end
file = io.open('output.csv','w+')
file:write(table.concat(headers,';')..'\n')
file:close()
parse(dir)