From 09467fb85ebd01329c34dd5682496d8473547e13 Mon Sep 17 00:00:00 2001 From: Bilal-Abbas-Gigalabs Date: Wed, 3 Jul 2024 22:44:49 +0500 Subject: [PATCH 1/3] initial files for workable parser and uploading --- app/jobs/importer/xml/workable_parser_job.rb | 12 +++ app/services/importer/xml/workable_parser.rb | 102 ++++++++++++++++++ lib/tasks/import_jobs.rake | 9 ++ .../importer/xml/workable_parser_job_spec.rb | 13 +++ spec/tasks/import_jobs_rake_spec.rb | 14 +++ 5 files changed, 150 insertions(+) create mode 100644 app/jobs/importer/xml/workable_parser_job.rb create mode 100644 app/services/importer/xml/workable_parser.rb create mode 100644 lib/tasks/import_jobs.rake create mode 100644 spec/models/spec/jobs/importer/xml/workable_parser_job_spec.rb create mode 100644 spec/tasks/import_jobs_rake_spec.rb diff --git a/app/jobs/importer/xml/workable_parser_job.rb b/app/jobs/importer/xml/workable_parser_job.rb new file mode 100644 index 000000000..4c98dbc41 --- /dev/null +++ b/app/jobs/importer/xml/workable_parser_job.rb @@ -0,0 +1,12 @@ +module Importer + module Xml + class WorkableParserJob < ApplicationJob + queue_as :default + retry_on StandardError, attempts: 0 + + def perform + Importer::Xml::WorkableParser.new.import_jobs + end + end + end +end \ No newline at end of file diff --git a/app/services/importer/xml/workable_parser.rb b/app/services/importer/xml/workable_parser.rb new file mode 100644 index 000000000..778fc21f6 --- /dev/null +++ b/app/services/importer/xml/workable_parser.rb @@ -0,0 +1,102 @@ +module Importer + module Xml + class WorkableParser < ApplicationService + WORKABLE_URL = 'https://www.workable.com/boards/workable.xml' + LOCAL_XML_PATH = 'workable.xml' + S3_BUCKET = 'your bucket name' # please enter your bucket name + S3_REGION = 'your regoin' # please enter your bucket region + S3_KEY = 'workable.xml' + REDIRECTED_URLS_PATH = 'redirected_urls.json' + MAX_RETRIES = 5 + RETRY_DELAY = 5 + + def initialize + puts("Started parser initializer") + @s3_client = Aws::S3::Client.new(region: S3_REGION) + end + + def import_jobs + # stream_and_save_xml + parse_xml + save_and_upload + create_jobs + end + + private + + def stream_and_save_xml + puts("Started stream_and_save_xml") + response = retry_request do + Faraday.get(WORKABLE_URL) do |req| + req.options.timeout = 600 + req.options.open_timeout = 600 + end + end + + if response + File.open(LOCAL_XML_PATH, 'wb') do |file| + response.body.each do |chunk| + file.write(chunk) + end + end + puts "File saved: #{LOCAL_XML_PATH}" + upload_to_s3(LOCAL_XML_PATH, S3_KEY) + else + puts "Failed to save file." + end + end + + def retry_request + retries = 0 + begin + response = yield + puts "Response status: #{response.status}, Response body length: #{response.body.length}" + if response.status == 429 # Too Many Requests + raise Faraday::Error, "Too Many Requests" + end + response + rescue Faraday::Error => e + puts "Request failed: #{e.message}" + retries += 1 + if retries <= MAX_RETRIES + delay = RETRY_DELAY * (2 ** (retries - 1)) # Exponential backoff + puts "Retrying in #{delay} seconds..." + sleep delay + retry + else + puts "Failed after #{MAX_RETRIES} retries: #{e.message}" + nil + end + end + end + + def parse_xml + puts("Started parse_xml") + if File.exist?(LOCAL_XML_PATH) + @doc = Nokogiri::XML(File.open(LOCAL_XML_PATH)) + @urls = @doc.xpath('//url').map { |url| url.text.strip } + else + puts "File not found: #{LOCAL_XML_PATH}" + end + end + + def save_and_upload + puts("Started save_and_upload") + File.open(REDIRECTED_URLS_PATH, 'w') { |file| file.write(@urls.to_json) } + upload_to_s3(REDIRECTED_URLS_PATH) + end + + def upload_to_s3(local_path) + puts("Started upload_to_s3") + File.open(local_path, 'rb') do |file| + @s3_client.put_object(bucket: S3_BUCKET, key: local_path, body: file) + end + puts "Uploaded #{local_path} to S3 bucket #{S3_BUCKET} as #{local_path}" + end + + def create_jobs + print("Total number of URLs are #{@urls.count}") + end + end + end +end \ No newline at end of file diff --git a/lib/tasks/import_jobs.rake b/lib/tasks/import_jobs.rake new file mode 100644 index 000000000..55a49f8da --- /dev/null +++ b/lib/tasks/import_jobs.rake @@ -0,0 +1,9 @@ +namespace :importer do + namespace :xml do + desc "Import jobs from Workable" + task import_jobs: :environment do + Importer::Xml::WorkableParserJob.perform_now + puts "Jobs imported successfully." + end + end +end \ No newline at end of file diff --git a/spec/models/spec/jobs/importer/xml/workable_parser_job_spec.rb b/spec/models/spec/jobs/importer/xml/workable_parser_job_spec.rb new file mode 100644 index 000000000..078fd098b --- /dev/null +++ b/spec/models/spec/jobs/importer/xml/workable_parser_job_spec.rb @@ -0,0 +1,13 @@ +require 'rails_helper' + +RSpec.describe Importer::Xml::WorkableParserJob, type: :job do + describe '#perform' do + it 'calls import_jobs on WorkableParser' do + parser = instance_double("Importer::Xml::WorkableParser") + allow(Importer::Xml::WorkableParser).to receive(:new).and_return(parser) + expect(parser).to receive(:import_jobs) + + described_class.perform_now + end + end +end \ No newline at end of file diff --git a/spec/tasks/import_jobs_rake_spec.rb b/spec/tasks/import_jobs_rake_spec.rb new file mode 100644 index 000000000..6cfb0b46e --- /dev/null +++ b/spec/tasks/import_jobs_rake_spec.rb @@ -0,0 +1,14 @@ +require 'rails_helper' +require 'rake' + +RSpec.describe 'importer:xml:import_jobs', type: :task do + before :all do + Rake.application.rake_require 'tasks/import_jobs' + Rake::Task.define_task(:environment) + end + + it 'executes the import_jobs task successfully' do + expect(Importer::Xml::WorkableParserJob).to receive(:perform_now) + Rake::Task['importer:xml:import_jobs'].invoke + end +end \ No newline at end of file From 66246314dfa3286f2806c34f79b41c178580c5cc Mon Sep 17 00:00:00 2001 From: Bilal-Abbas-Gigalabs Date: Mon, 8 Jul 2024 17:08:19 +0500 Subject: [PATCH 2/3] 1. updated code with s3 uploading comments. 2. Updated logger when rake task run successfully --- app/services/importer/xml/workable_parser.rb | 65 ++++++++++---------- lib/tasks/import_jobs.rake | 2 +- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/app/services/importer/xml/workable_parser.rb b/app/services/importer/xml/workable_parser.rb index 778fc21f6..7efb0afdd 100644 --- a/app/services/importer/xml/workable_parser.rb +++ b/app/services/importer/xml/workable_parser.rb @@ -3,11 +3,10 @@ module Xml class WorkableParser < ApplicationService WORKABLE_URL = 'https://www.workable.com/boards/workable.xml' LOCAL_XML_PATH = 'workable.xml' - S3_BUCKET = 'your bucket name' # please enter your bucket name - S3_REGION = 'your regoin' # please enter your bucket region - S3_KEY = 'workable.xml' + S3_BUCKET = 'S3_BUCKET_NAME' # please enter your bucket name + S3_REGION = 'S3_REGION' # please enter your bucket region REDIRECTED_URLS_PATH = 'redirected_urls.json' - MAX_RETRIES = 5 + MAX_RETRIES = 5 # workable allow 5 tries in certain time frame RETRY_DELAY = 5 def initialize @@ -16,7 +15,7 @@ def initialize end def import_jobs - # stream_and_save_xml + stream_and_save_xml parse_xml save_and_upload create_jobs @@ -25,38 +24,43 @@ def import_jobs private def stream_and_save_xml - puts("Started stream_and_save_xml") - response = retry_request do - Faraday.get(WORKABLE_URL) do |req| - req.options.timeout = 600 - req.options.open_timeout = 600 - end - end - - if response - File.open(LOCAL_XML_PATH, 'wb') do |file| - response.body.each do |chunk| - file.write(chunk) + puts "Started stream_and_save_xml" + uri = URI.parse(WORKABLE_URL) + Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| + request = Net::HTTP::Get.new(uri.request_uri) + begin + http.request(request) do |response| + if response.is_a?(Net::HTTPSuccess) + File.open(LOCAL_XML_PATH, 'wb') do |file| + total_size = response['content-length'].to_i + downloaded_size = 0 + response.read_body do |chunk| + file.write(chunk) + downloaded_size += chunk.size + if total_size > 0 + puts "Chunk download progress: #{((downloaded_size.to_f / total_size) * 100).round(2)}%" + else + puts "Chunk download progress: #{(downloaded_size.to_f / (downloaded_size + 1) * 100).round(2)}%" + end + end + end + puts "File saved: #{LOCAL_XML_PATH}" + else + puts "Failed to retrieve file: #{response.code} #{response.message}" + end end + rescue StandardError => e + puts "Request failed: #{e.message}. Retrying..." + retry_request { stream_and_save_xml } # Retry the whole download in case of any error end - puts "File saved: #{LOCAL_XML_PATH}" - upload_to_s3(LOCAL_XML_PATH, S3_KEY) - else - puts "Failed to save file." end end def retry_request retries = 0 begin - response = yield - puts "Response status: #{response.status}, Response body length: #{response.body.length}" - if response.status == 429 # Too Many Requests - raise Faraday::Error, "Too Many Requests" - end - response - rescue Faraday::Error => e - puts "Request failed: #{e.message}" + yield + rescue StandardError => e retries += 1 if retries <= MAX_RETRIES delay = RETRY_DELAY * (2 ** (retries - 1)) # Exponential backoff @@ -65,7 +69,6 @@ def retry_request retry else puts "Failed after #{MAX_RETRIES} retries: #{e.message}" - nil end end end @@ -95,7 +98,7 @@ def upload_to_s3(local_path) end def create_jobs - print("Total number of URLs are #{@urls.count}") + print("Total number of URLs are #{@urls.count}. Please create jobs as per need. \n") end end end diff --git a/lib/tasks/import_jobs.rake b/lib/tasks/import_jobs.rake index 55a49f8da..9c32a7609 100644 --- a/lib/tasks/import_jobs.rake +++ b/lib/tasks/import_jobs.rake @@ -3,7 +3,7 @@ namespace :importer do desc "Import jobs from Workable" task import_jobs: :environment do Importer::Xml::WorkableParserJob.perform_now - puts "Jobs imported successfully." + puts "\nJobs imported successfully." end end end \ No newline at end of file From 91581f13d0a1dcd970cea5246e8e14694ef7b67e Mon Sep 17 00:00:00 2001 From: Bilal-Abbas-Gigalabs Date: Mon, 8 Jul 2024 17:27:26 +0500 Subject: [PATCH 3/3] fixed rubocop issues for file indentation --- app/jobs/importer/xml/workable_parser_job.rb | 4 +- app/services/importer/xml/workable_parser.rb | 160 +++++++++---------- lib/tasks/import_jobs.rake | 6 +- 3 files changed, 85 insertions(+), 85 deletions(-) diff --git a/app/jobs/importer/xml/workable_parser_job.rb b/app/jobs/importer/xml/workable_parser_job.rb index 4c98dbc41..bfaabd3be 100644 --- a/app/jobs/importer/xml/workable_parser_job.rb +++ b/app/jobs/importer/xml/workable_parser_job.rb @@ -5,8 +5,8 @@ class WorkableParserJob < ApplicationJob retry_on StandardError, attempts: 0 def perform - Importer::Xml::WorkableParser.new.import_jobs + Importer::Xml::WorkableParser.new.import_jobs end end end -end \ No newline at end of file +end diff --git a/app/services/importer/xml/workable_parser.rb b/app/services/importer/xml/workable_parser.rb index 7efb0afdd..0b90990fb 100644 --- a/app/services/importer/xml/workable_parser.rb +++ b/app/services/importer/xml/workable_parser.rb @@ -1,105 +1,105 @@ module Importer module Xml - class WorkableParser < ApplicationService - WORKABLE_URL = 'https://www.workable.com/boards/workable.xml' - LOCAL_XML_PATH = 'workable.xml' - S3_BUCKET = 'S3_BUCKET_NAME' # please enter your bucket name - S3_REGION = 'S3_REGION' # please enter your bucket region - REDIRECTED_URLS_PATH = 'redirected_urls.json' - MAX_RETRIES = 5 # workable allow 5 tries in certain time frame - RETRY_DELAY = 5 + class WorkableParser < ApplicationService + WORKABLE_URL = 'https://www.workable.com/boards/workable.xml' + LOCAL_XML_PATH = 'workable.xml' + S3_BUCKET = 'S3_BUCKET_NAME' # please enter your bucket name + S3_REGION = 'S3_REGION' # please enter your bucket region + REDIRECTED_URLS_PATH = 'redirected_urls.json' + MAX_RETRIES = 5 # workable allow 5 tries in certain time frame + RETRY_DELAY = 5 - def initialize - puts("Started parser initializer") - @s3_client = Aws::S3::Client.new(region: S3_REGION) - end + def initialize + puts("Started parser initializer") + @s3_client = Aws::S3::Client.new(region: S3_REGION) + end - def import_jobs - stream_and_save_xml - parse_xml - save_and_upload - create_jobs - end + def import_jobs + stream_and_save_xml + parse_xml + save_and_upload + create_jobs + end - private + private - def stream_and_save_xml - puts "Started stream_and_save_xml" - uri = URI.parse(WORKABLE_URL) - Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| - request = Net::HTTP::Get.new(uri.request_uri) - begin - http.request(request) do |response| - if response.is_a?(Net::HTTPSuccess) - File.open(LOCAL_XML_PATH, 'wb') do |file| - total_size = response['content-length'].to_i - downloaded_size = 0 - response.read_body do |chunk| - file.write(chunk) - downloaded_size += chunk.size - if total_size > 0 - puts "Chunk download progress: #{((downloaded_size.to_f / total_size) * 100).round(2)}%" - else - puts "Chunk download progress: #{(downloaded_size.to_f / (downloaded_size + 1) * 100).round(2)}%" + def stream_and_save_xml + puts "Started stream_and_save_xml" + uri = URI.parse(WORKABLE_URL) + Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| + request = Net::HTTP::Get.new(uri.request_uri) + begin + http.request(request) do |response| + if response.is_a?(Net::HTTPSuccess) + File.open(LOCAL_XML_PATH, 'wb') do |file| + total_size = response['content-length'].to_i + downloaded_size = 0 + response.read_body do |chunk| + file.write(chunk) + downloaded_size += chunk.size + if total_size.positive? + puts "Chunk download progress: #{((downloaded_size.to_f / total_size) * 100).round(2)}%" + else + puts "Chunk download progress: #{(downloaded_size.to_f / (downloaded_size + 1) * 100).round(2)}%" + end end end + puts "File saved: #{LOCAL_XML_PATH}" + else + puts "Failed to retrieve file: #{response.code} #{response.message}" end - puts "File saved: #{LOCAL_XML_PATH}" - else - puts "Failed to retrieve file: #{response.code} #{response.message}" end + rescue StandardError => e + puts "Request failed: #{e.message}. Retrying..." + retry_request { stream_and_save_xml } # Retry the whole download in case of any error end + end + end + + def retry_request + retries = 0 + begin + yield rescue StandardError => e - puts "Request failed: #{e.message}. Retrying..." - retry_request { stream_and_save_xml } # Retry the whole download in case of any error + retries += 1 + if retries <= MAX_RETRIES + delay = RETRY_DELAY * (2 ** (retries - 1)) # Exponential backoff + puts "Retrying in #{delay} seconds..." + sleep delay + retry + else + puts "Failed after #{MAX_RETRIES} retries: #{e.message}" + end end end - end - def retry_request - retries = 0 - begin - yield - rescue StandardError => e - retries += 1 - if retries <= MAX_RETRIES - delay = RETRY_DELAY * (2 ** (retries - 1)) # Exponential backoff - puts "Retrying in #{delay} seconds..." - sleep delay - retry + def parse_xml + puts("Started parse_xml") + if File.exist?(LOCAL_XML_PATH) + @doc = Nokogiri::XML(File.open(LOCAL_XML_PATH)) + @urls = @doc.xpath('//url').map { |url| url.text.strip } else - puts "Failed after #{MAX_RETRIES} retries: #{e.message}" + puts "File not found: #{LOCAL_XML_PATH}" end end - end - def parse_xml - puts("Started parse_xml") - if File.exist?(LOCAL_XML_PATH) - @doc = Nokogiri::XML(File.open(LOCAL_XML_PATH)) - @urls = @doc.xpath('//url').map { |url| url.text.strip } - else - puts "File not found: #{LOCAL_XML_PATH}" + def save_and_upload + puts("Started save_and_upload") + File.write(REDIRECTED_URLS_PATH, @urls.to_json) + # upload_to_s3(REDIRECTED_URLS_PATH) end - end - - def save_and_upload - puts("Started save_and_upload") - File.open(REDIRECTED_URLS_PATH, 'w') { |file| file.write(@urls.to_json) } - upload_to_s3(REDIRECTED_URLS_PATH) - end - def upload_to_s3(local_path) - puts("Started upload_to_s3") - File.open(local_path, 'rb') do |file| - @s3_client.put_object(bucket: S3_BUCKET, key: local_path, body: file) + def upload_to_s3(local_path) + puts("Started upload_to_s3") + File.open(local_path, 'rb') do |file| + @s3_client.put_object(bucket: S3_BUCKET, key: local_path, body: file) + end + puts "Uploaded #{local_path} to S3 bucket #{S3_BUCKET} as #{local_path}" end - puts "Uploaded #{local_path} to S3 bucket #{S3_BUCKET} as #{local_path}" - end - def create_jobs - print("Total number of URLs are #{@urls.count}. Please create jobs as per need. \n") + def create_jobs + print("Total number of URLs are #{@urls.count}. Please create jobs as per need. \n") + end end end - end -end \ No newline at end of file +end diff --git a/lib/tasks/import_jobs.rake b/lib/tasks/import_jobs.rake index 9c32a7609..587943260 100644 --- a/lib/tasks/import_jobs.rake +++ b/lib/tasks/import_jobs.rake @@ -2,8 +2,8 @@ namespace :importer do namespace :xml do desc "Import jobs from Workable" task import_jobs: :environment do - Importer::Xml::WorkableParserJob.perform_now - puts "\nJobs imported successfully." + Importer::Xml::WorkableParserJob.perform_now + puts "\nJobs imported successfully." end end -end \ No newline at end of file +end