Skip to content

Commit e1cd958

Browse files
authored
Merge pull request #12 from wikirate/update-entity-codenames-index-mapping
Update entity codenames index mapping
2 parents 702b294 + 107c854 commit e1cd958

File tree

4 files changed

+29
-63
lines changed

4 files changed

+29
-63
lines changed

config/mappings/companies.json

+4-20
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,12 @@
55
"mappings": {
66
"properties": {
77
"headquarters": {
8-
"type": "keyword"
9-
},
10-
"sec_cik": {
118
"type": "text",
12-
"analyzer": "keyword"
9+
"index": "not_analyzed"
1310
},
14-
"oar_id": {
15-
"type": "text",
16-
"analyzer": "keyword"
17-
},
18-
"open_corporates": {
19-
"type": "text",
20-
"analyzer": "keyword"
21-
},
22-
"uk_cn": {
23-
"type": "text",
24-
"analyzer": "keyword"
25-
},
26-
"aus_cn": {
27-
"type":"text",
28-
"analyzer": "keyword"
11+
"coroporate_identifiers": {
12+
"type": "keyword"
2913
}
3014
}
3115
}
32-
}
16+
}

config/queries/companies.sql

+17-32
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,19 @@
1-
SELECT
2-
co.updated_at,
3-
IF (
4-
co.created_at = co.updated_at,
5-
"create",
6-
IF (co.trash = 1, "delete", "update")
7-
) AS action_type,
1+
SELECT co.updated_at,
2+
IF(co.created_at = co.updated_at, "create", IF(co.trash=1, "delete", "update")) as action_type,
83
co.id,
94
co.name,
10-
hq.db_content AS headquarters,
11-
sec.db_content AS sec_cik,
12-
oar.db_content AS oar_id,
13-
oc.db_content AS open_corporates,
14-
uk_cn.db_content AS uk_cn,
15-
abn.db_content AS aus_cn
16-
FROM
17-
cards AS co
18-
LEFT JOIN cards AS hq ON co.id = hq.left_id
19-
AND hq.right_id = :headquarters
20-
LEFT JOIN cards AS sec ON co.id = sec.left_id
21-
AND sec.right_id = :sec_cik
22-
LEFT JOIN cards AS oar ON co.id = oar.left_id
23-
AND oar.right_id = :oar_id
24-
LEFT JOIN cards AS oc ON co.id = oc.left_id
25-
AND oc.right_id = :open_corporates
26-
LEFT JOIN cards AS uk_cn ON co.id = uk_cn.left_id
27-
AND uk_cn.right_id = :uk_cn
28-
LEFT JOIN cards AS abn ON co.id = abn.left_id
29-
AND abn.right_id = :abn
30-
WHERE
31-
co.type_id = :wikirate_company
32-
AND co.updated_at > :sql_last_value
33-
ORDER BY
34-
co.updated_at;
5+
hq.db_content as headquarters,
6+
GROUP_CONCAT(cids.db_content SEPARATOR ',') as corporate_identifiers
7+
FROM cards as co
8+
LEFT JOIN cards as hq ON co.id = hq.left_id AND hq.right_id=:headquarters
9+
LEFT JOIN cards as cids ON co.id=cids.left_id AND cids.right_id IN (SELECT id from cards where type_id=:company_identifier)
10+
WHERE co.type_id=:wikirate_company
11+
AND co.updated_at >= :sql_last_value
12+
GROUP BY
13+
co.updated_at,
14+
co.created_at,
15+
co.trash,
16+
co.id,
17+
co.name,
18+
hq.db_content
19+
ORDER BY co.updated_at;

entrypoint.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ set -euo pipefail
44

55
export DATABASE_PORT=${DATABASE_PORT:-3306}
66

7-
cards="'headquarters','sec_cik','oar_id','open_corporates_id','wikirate_company',\
8-
'metric','source','phrase','wikirate_title','wikirate_topic','project',\
9-
'research_group','company_group','dataset','uk_cn', 'abn'"
7+
cards="'headquarters','company',\
8+
'metric','source','phrase','wikirate_title','topic','project',\
9+
'research_group','company_group','dataset','company_identifier'"
1010

1111
query="SELECT CONCAT('PARAM_', UPPER(IFNULL(codename, name)), '=', id) AS result \
1212
FROM cards WHERE codename IN ($cards) OR name IN ($cards)"

pipeline/wikirate.conf

+5-8
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ input {
88
last_run_metadata_path => "/usr/share/logstash/data/plugins/inputs/jdbc/logstash_jdbc_last_run_general"
99
statement_filepath => "/usr/share/logstash/config/queries/general.sql"
1010
parameters => {
11-
"wikirate_company" => "${PARAM_WIKIRATE_COMPANY:}"
12-
"wikirate_topic" => "${PARAM_WIKIRATE_TOPIC:}"
11+
"wikirate_company" => "${PARAM_COMPANY:}"
12+
"wikirate_topic" => "${PARAM_TOPIC:}"
1313
"project" => "${PARAM_PROJECT:}"
1414
"research_group" => "${PARAM_RESEARCH_GROUP:}"
1515
"company_group" => "${PARAM_COMPANY_GROUP:}"
@@ -69,13 +69,10 @@ input {
6969
last_run_metadata_path => "/usr/share/logstash/data/plugins/inputs/jdbc/logstash_jdbc_last_run_companies"
7070
statement_filepath => "/usr/share/logstash/config/queries/companies.sql"
7171
parameters => {
72+
"wikirate_company" => "${PARAM_COMPANY:}"
7273
"headquarters" => "${PARAM_HEADQUARTERS:}"
73-
"sec_cik" => "${PARAM_SEC_CIK:}"
74-
"oar_id" => "${PARAM_OAR_ID:}"
75-
"open_corporates" => "${PARAM_OPEN_CORPORATES_ID:}"
76-
"uk_cn" => "${PARAM_UK_CN:}"
77-
"abn" => "${PARAM_ABN:}"
78-
"wikirate_company" => "${PARAM_WIKIRATE_COMPANY:}"
74+
"company_identifier" => "${PARAM_COMPANY_IDENTIFIER:}"
75+
7976
}
8077
sql_log_level => "debug"
8178
use_column_value => true

0 commit comments

Comments
 (0)