From d3cb3a58810e41bbe11af47f45389c0259b4bba9 Mon Sep 17 00:00:00 2001 From: "weakmap@gmail.com" Date: Sun, 29 Sep 2024 16:58:01 +0200 Subject: [PATCH] regex for einsAImmobilien price normalization | filter listings that does not have all required keys --- lib/FredyRuntime.js | 5 ++++- lib/notification/adapter/apprise.js | 2 +- lib/provider/einsAImmobilien.js | 25 +++++++++++++++++++++---- package.json | 2 +- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/lib/FredyRuntime.js b/lib/FredyRuntime.js index 210731c..4735333 100755 --- a/lib/FredyRuntime.js +++ b/lib/FredyRuntime.js @@ -87,7 +87,10 @@ class FredyRuntime { return listings.map(this._providerConfig.normalize); } _filter(listings) { - return listings.filter(this._providerConfig.filter); + //only return those where all the fields have been found + const keys = Object.keys(this._providerConfig.crawlFields); + const filteredListings = listings.filter((item) => keys.every((key) => key in item)); + return filteredListings.filter(this._providerConfig.filter); } _findNew(listings) { const newListings = listings.filter((o) => getKnownListings(this._jobKey, this._providerId)[o.id] == null); diff --git a/lib/notification/adapter/apprise.js b/lib/notification/adapter/apprise.js index 77d019a..a91f2f3 100644 --- a/lib/notification/adapter/apprise.js +++ b/lib/notification/adapter/apprise.js @@ -8,7 +8,7 @@ export const send = ({ serviceName, newListings, notificationConfig, jobKey }) = const jobName = job == null ? jobKey : job.name; const promises = newListings.map((newListing) => { const title = `${jobName} at ${serviceName}: ${newListing.title}`; - const message = `Address: ${newListing.address}\nSize: ${newListing.size}\nPrice: ${newListing.price}\Link: ${newListing.link}`; + const message = `Address: ${newListing.address}\nSize: ${newListing.size}\nPrice: ${newListing.price}\nink: ${newListing.link}`; return fetch(server, { method: 'POST', headers: { 'Content-Type': 'application/json' }, diff --git a/lib/provider/einsAImmobilien.js b/lib/provider/einsAImmobilien.js index 7cd9a1b..07a8b06 100755 --- a/lib/provider/einsAImmobilien.js +++ b/lib/provider/einsAImmobilien.js @@ -1,4 +1,4 @@ -import utils, {buildHash} from '../utils.js'; +import utils, { buildHash } from '../utils.js'; let appliedBlackList = []; function normalize(o) { @@ -7,10 +7,28 @@ function normalize(o) { size += ` / / ${o.rooms.trim()}`; } const link = `https://www.1a-immobilienmarkt.de/expose/${o.id}.html`; - const id = buildHash(o.id, o.price); - return Object.assign(o, { id, size, link }); + const price = normalizePrice(o.price); + const id = buildHash(o.id, price); + return Object.assign(o, { id, price, size, link }); } +/** + * einsAImmobilien sometimes use a weird pricing label such as `775.700,00 EUR Kaufpreis ab 2.475 € mtl`. + * Make sure to extract only the actual price out of the string. + * @param price + * @returns {*} + */ +function normalizePrice(price) { + if (price == null) { + return null; + } + const regex = /(\d{1,3}(?:\.\d{3})*,\d{2})\s?(EUR|€)/g; + const result = price.match(regex); + if (result == null || result.length === 0) { + return price; + } + return result[0]; +} function applyBlacklist(o) { const titleNotBlacklisted = !utils.isOneOf(o.title, appliedBlackList); const descNotBlacklisted = !utils.isOneOf(o.description, appliedBlackList); @@ -27,7 +45,6 @@ const config = { size: '.tabelle .inner_object_data .data_boxes div:nth-child(1)', rooms: '.tabelle .inner_object_data .data_boxes div:nth-child(2)', title: '.tabelle .inner_object_data .tabelle_inhalt_titel_black | removeNewline | trim', - description: '.tabelle .inner_object_data .objekt_beschreibung | removeNewline | trim', }, normalize: normalize, filter: applyBlacklist, diff --git a/package.json b/package.json index 9c1ff5b..3d9a5fa 100755 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "fredy", - "version": "10.0.0", + "version": "10.1.0", "description": "[F]ind [R]eal [E]states [d]amn eas[y].", "scripts": { "start": "node index.js",