Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

filesize is not provided by curl #1871

Merged
merged 3 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion docs/source/admin/fs/rest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ You will get back your document as it has been stored by elasticsearch:
}
}

If you started FSCrawler in debug mode with ``--debug`` or if you pass
If you started FSCrawler in debug mode or if you pass
``debug=true`` query parameter, then the response will be much more
complete:

Expand Down Expand Up @@ -279,6 +279,15 @@ The field ``external`` doesn't necessarily be a flat structure. This is a more a
}
}

You can use this technique to add for example the filesize of the file your are uploading::

.. code:: sh

echo "This is my text" > test.txt
curl -F "file=@test.txt" \
-F "tags={\"file\":{\"filesize\":$(ls -l test.txt | awk '{print $5}')}}" \
"http://127.0.0.1:8080/fscrawler/_document"

.. attention:: Only standard :ref:`FSCrawler fields <generated_fields>` can be set outside ``external`` field name.

Remove a document
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public void testUploadDocumentWithIdUsingPut() throws Exception {
// We wait until we have our document
ESSearchResponse response = countTestHelper(new ESSearchRequest().withIndex(getCrawlerName()), 1L, null);
assertThat(response.getHits().get(0).getId(), is("1234"));
assertThat(JsonPath.read(response.getHits().get(0).getSource(), "$.file.filesize"), notNullValue());
assertThat(JsonPath.read(response.getHits().get(0).getSource(), "$.file.filesize"), greaterThan(0));
}

@Test
Expand Down Expand Up @@ -224,7 +224,14 @@ public void testAllDocumentsWithRestExternalIndex() throws Exception {
.timeValueMinutes(2));
for (ESSearchHit hit : response.getHits()) {
assertThat(JsonPath.read(hit.getSource(), "$.file.extension"), notNullValue());
assertThat(JsonPath.read(hit.getSource(), "$.file.filesize"), notNullValue());
int filesize = JsonPath.read(hit.getSource(), "$.file.filesize");
if (filesize <= 0) {
// On some machines (ie Github Actions), the size is not provided
logger.warn("File [{}] has a size of [{}]",
JsonPath.read(hit.getSource(), "$.file.filename"), filesize);
} else {
assertThat(JsonPath.read(hit.getSource(), "$.file.filesize"), greaterThan(0));
}
}
}

Expand All @@ -249,7 +256,7 @@ public void testDocumentWithExternalTags() throws Exception {
checkDocument("add_external.txt", hit -> {
assertThat(JsonPath.read(hit.getSource(), "$.content"), containsString("This file content will be extracted"));
assertThat(JsonPath.read(hit.getSource(), "$.file.extension"), notNullValue());
assertThat(JsonPath.read(hit.getSource(), "$.file.filesize"), notNullValue());
assertThat(JsonPath.read(hit.getSource(), "$.file.filesize"), greaterThan(0));
expectThrows(PathNotFoundException.class, () -> JsonPath.read(hit.getSource(), "$.meta"));
assertThat(JsonPath.read(hit.getSource(), "$.external.tenantId"), is(23));
assertThat(JsonPath.read(hit.getSource(), "$.external.company"), is("shoe company"));
Expand All @@ -265,7 +272,7 @@ public void testDocumentWithExternalTags() throws Exception {
checkDocument("replace_content_and_external.txt", hit -> {
assertThat(JsonPath.read(hit.getSource(), "$.content"), is("OVERWRITTEN CONTENT"));
assertThat(JsonPath.read(hit.getSource(), "$.file.extension"), notNullValue());
assertThat(JsonPath.read(hit.getSource(), "$.file.filesize"), notNullValue());
assertThat(JsonPath.read(hit.getSource(), "$.file.filesize"), greaterThan(0));
expectThrows(PathNotFoundException.class, () -> JsonPath.read(hit.getSource(), "$.meta"));
assertThat(JsonPath.read(hit.getSource(), "$.external.tenantId"), is(23));
assertThat(JsonPath.read(hit.getSource(), "$.external.company"), is("shoe company"));
Expand All @@ -281,7 +288,7 @@ public void testDocumentWithExternalTags() throws Exception {
checkDocument("replace_content_only.txt", hit -> {
assertThat(JsonPath.read(hit.getSource(), "$.content"), is("OVERWRITTEN CONTENT"));
assertThat(JsonPath.read(hit.getSource(), "$.file.extension"), notNullValue());
assertThat(JsonPath.read(hit.getSource(), "$.file.filesize"), notNullValue());
assertThat(JsonPath.read(hit.getSource(), "$.file.filesize"), greaterThan(0));
expectThrows(PathNotFoundException.class, () -> JsonPath.read(hit.getSource(), "$.meta"));
expectThrows(PathNotFoundException.class, () -> JsonPath.read(hit.getSource(), "$.external"));
});
Expand Down