Add initial typo3-solr dockerfile

Related: #166
webdevops · Feb 12, 2017 · 69c485b · 69c485b
1 parent 6d086b8
commit 69c485b
Show file tree

Hide file tree

Showing 242 changed files with 19,212 additions and 1 deletion.
diff --git a/conf/console.yml b/conf/console.yml
@@ -35,6 +35,10 @@ dockerTest:
   dockerfile:
     '/varnish':
       - 'ENV VARNISH_BACKEND_HOST webdevops.io'
+    '/typo3-solr':
+      - 'USER root'
+      - 'RUN apt-get update && apt-get install -y -f --no-install-recommends lsb-release lsb-base net-tools && apt-get clean -y'
+      - 'USER solr'
 
   configuration:
     default:
@@ -113,3 +117,8 @@ dockerTest:
       '/samson-deployment':
         OS_FAMILY: 'debian'
         OS_VERSION: '8'
+      '/typo3-solr':
+        OS_FAMILY: 'debian'
+        OS_VERSION: '8'
+      '/typo3-solr:6.0':
+        SOLR_VERSION: '6.3.0'
diff --git a/docker/typo3-solr/6.0/Dockerfile b/docker/typo3-solr/6.0/Dockerfile
@@ -0,0 +1,28 @@
+#+++++++++++++++++++++++++++++++++++++++
+# Dockerfile for webdevops/typo3-solr:6.0
+#    -- automatically generated  --
+#+++++++++++++++++++++++++++++++++++++++
+
+FROM solr:6.3.0
+
+MAINTAINER info@webdevops.io
+LABEL vendor=WebDevOps.io
+LABEL io.webdevops.layout=8
+LABEL io.webdevops.version=1.3.5
+
+ENV TERM   "xterm"
+ENV LANG   "C.UTF-8"
+ENV LC_ALL "C.UTF-8"
+
+RUN rm -fR /opt/solr/server/solr/*
+
+COPY ./solr/ /opt/solr/server/solr
+
+USER root
+
+RUN mkdir -p /opt/solr/server/solr/data \
+    && chown -R solr:solr /opt/solr/server/solr/
+
+USER solr
+
+VOLUME ["/opt/solr/server/solr/data"]
diff --git a/docker/typo3-solr/6.0/Dockerfile.jinja2 b/docker/typo3-solr/6.0/Dockerfile.jinja2
@@ -0,0 +1,18 @@
+{{ docker.fromOfficial("solr", "6.3.0") }}
+
+{{ docker.version() }}
+
+{{ environment.general(charset="C.UTF-8") }}
+
+RUN rm -fR /opt/solr/server/solr/*
+
+COPY ./solr/ /opt/solr/server/solr
+
+USER root
+
+RUN mkdir -p /opt/solr/server/solr/data \
+    && chown -R solr:solr /opt/solr/server/solr/
+
+USER solr
+
+VOLUME ["/opt/solr/server/solr/data"]
diff --git a/docker/typo3-solr/6.0/solr/configsets/ext_solr_6_0_0/conf/admin-extra.html b/docker/typo3-solr/6.0/solr/configsets/ext_solr_6_0_0/conf/admin-extra.html
@@ -0,0 +1,14 @@
+<!-- The content of this page will be statically included into the top
+of the admin page.  Uncomment this as an example to see there the content
+will show up.
+
+<hr>
+<i>This line will appear before the first table</i>
+<tr>
+<td colspan="2">
+This row will be appended to the end of the first table
+</td>
+</tr>
+<hr>
+
+-->
diff --git a/...olr/6.0/solr/configsets/ext_solr_6_0_0/conf/arabic/_schema_analysis_stopwords_arabic.json b/...olr/6.0/solr/configsets/ext_solr_6_0_0/conf/arabic/_schema_analysis_stopwords_arabic.json
@@ -0,0 +1,127 @@
+{
+  "initArgs": {
+    "ignoreCase": true
+  },
+  "initializedOn": "2014-04-29T23:08:57.000Z",
+  "managedList": [
+    "من",
+    "ومن",
+    "منها",
+    "منه",
+    "في",
+    "وفي",
+    "فيها",
+    "فيه",
+    "و",
+    "ف",
+    "ثم",
+    "او",
+    "أو",
+    "ب",
+    "بها",
+    "به",
+    "ا",
+    "أ",
+    "اى",
+    "اي",
+    "أي",
+    "أى",
+    "لا",
+    "ولا",
+    "الا",
+    "ألا",
+    "إلا",
+    "لكن",
+    "ما",
+    "وما",
+    "كما",
+    "فما",
+    "عن",
+    "مع",
+    "اذا",
+    "إذا",
+    "ان",
+    "أن",
+    "إن",
+    "انها",
+    "أنها",
+    "إنها",
+    "انه",
+    "أنه",
+    "إنه",
+    "بان",
+    "بأن",
+    "فان",
+    "فأن",
+    "وان",
+    "وأن",
+    "وإن",
+    "التى",
+    "التي",
+    "الذى",
+    "الذي",
+    "الذين",
+    "الى",
+    "الي",
+    "إلى",
+    "إلي",
+    "على",
+    "عليها",
+    "عليه",
+    "اما",
+    "أما",
+    "إما",
+    "ايضا",
+    "أيضا",
+    "كل",
+    "وكل",
+    "لم",
+    "ولم",
+    "لن",
+    "ولن",
+    "هى",
+    "هي",
+    "هو",
+    "وهى",
+    "وهي",
+    "وهو",
+    "فهى",
+    "فهي",
+    "فهو",
+    "انت",
+    "أنت",
+    "لك",
+    "لها",
+    "له",
+    "هذه",
+    "هذا",
+    "تلك",
+    "ذلك",
+    "هناك",
+    "كانت",
+    "كان",
+    "يكون",
+    "تكون",
+    "وكانت",
+    "وكان",
+    "غير",
+    "بعض",
+    "قد",
+    "نحو",
+    "بين",
+    "بينما",
+    "منذ",
+    "ضمن",
+    "حيث",
+    "الان",
+    "الآن",
+    "خلال",
+    "بعد",
+    "قبل",
+    "حتى",
+    "عند",
+    "عندما",
+    "لدى",
+    "جميع"
+  ]
+}
diff --git a/docker/typo3-solr/6.0/solr/configsets/ext_solr_6_0_0/conf/arabic/protwords.txt b/docker/typo3-solr/6.0/solr/configsets/ext_solr_6_0_0/conf/arabic/protwords.txt
@@ -0,0 +1 @@
+TYPO3
diff --git a/docker/typo3-solr/6.0/solr/configsets/ext_solr_6_0_0/conf/arabic/schema.xml b/docker/typo3-solr/6.0/solr/configsets/ext_solr_6_0_0/conf/arabic/schema.xml
@@ -0,0 +1,162 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+	The schema name property is constructed as follows
+
+	tx_solr		- The extension key
+	x-y-z		- The extension version this schema is meant to work with
+	YYYYMMDD	- The date the schema file was changed the last time
+
+	When changing the schema the name property must be updated. There is a
+	status report - tx_solr_report_SchemaStatus - checking against this
+	name property, that status check must be updated as well.
+-->
+<schema name="tx_solr-6-0-0--20161209" version="1.6" >
+	<!-- attribute "name" is the name of this schema and is only used for display purposes.
+		Applications should change this to reflect the nature of the search collection.
+		version="1.4" is Solr's version number for the schema syntax and semantics.  It should
+		not normally be changed by applications.
+		1.0: multiValued attribute did not exist, all fields are multiValued by nature
+		1.1: multiValued attribute introduced, false by default
+		1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
+		1.3: removed optional field compress feature
+		1.4: default auto-phrase (QueryParser feature) to off
+		1.5: omitNorms defaults to true for primitive field types (int, float, boolean, string...)
+		1.6: useDocValuesAsStored defaults to true.
+	-->
+
+	<uniqueKey>id</uniqueKey>
+
+	<!-- xinclude fields -->
+	<xi:include href="../general_schema_types.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
+	<!--  xinclude fields-->
+	<xi:include href="../general_schema_fields.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
+
+	<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
+		words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
+		so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
+		Synonyms and stopwords are customized by external files, and stemming is enabled.
+		Duplicate tokens at the same position (which may result from Stemmed Synonyms or
+		WordDelim parts) are removed.
+	-->
+	<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
+		<analyzer type="index">
+			<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+
+			<filter class="solr.WordDelimiterFilterFactory"
+				generateWordParts="1"
+				generateNumberParts="1"
+				catenateWords="1"
+				catenateNumbers="1"
+				catenateAll="0"
+				splitOnCaseChange="1"
+				preserveOriginal="1"
+				protected="arabic/protwords.txt"
+			/>
+			<filter class="solr.LowerCaseFilterFactory"/>
+
+			<filter class="solr.ManagedSynonymFilterFactory" managed="arabic"/>
+			<filter class="solr.ManagedStopFilterFactory" managed="arabic"/>
+
+			<filter class="solr.ArabicNormalizationFilterFactory"/>
+			<filter class="solr.KeywordMarkerFilterFactory" protected="arabic/protwords.txt"/>
+			<filter class="solr.ArabicStemFilterFactory"/>
+			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+		</analyzer>
+		<analyzer type="query">
+			<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+
+			<filter class="solr.WordDelimiterFilterFactory"
+				generateWordParts="1"
+				generateNumberParts="1"
+				catenateWords="0"
+				catenateNumbers="0"
+				catenateAll="0"
+				splitOnCaseChange="1"
+				preserveOriginal="1"
+				protected="arabic/protwords.txt"
+			/>
+			<filter class="solr.LowerCaseFilterFactory"/>
+
+			<filter class="solr.ManagedSynonymFilterFactory" managed="arabic"/>
+			<filter class="solr.ManagedStopFilterFactory" managed="arabic"/>
+
+			<filter class="solr.ArabicNormalizationFilterFactory"/>
+			<filter class="solr.KeywordMarkerFilterFactory" protected="arabic/protwords.txt"/>
+			<filter class="solr.ArabicStemFilterFactory"/>
+			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+		</analyzer>
+	</fieldType>
+
+
+	<!-- Less flexible matching, but less false matches.	Probably not ideal for product names,
+		but may be good for SKUs.	Can insert dashes in the wrong place and still match. -->
+	<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
+		<analyzer>
+			<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+
+			<filter class="solr.WordDelimiterFilterFactory"
+				generateWordParts="0"
+				generateNumberParts="0"
+				catenateWords="1"
+				catenateNumbers="1"
+				catenateAll="0"
+				preserveOriginal="1"
+				protected="arabic/protwords.txt"
+			/>
+			<filter class="solr.LowerCaseFilterFactory"/>
+
+			<filter class="solr.ManagedSynonymFilterFactory" managed="arabic"/>
+			<filter class="solr.ManagedStopFilterFactory" managed="arabic"/>
+
+			<filter class="solr.KeywordMarkerFilterFactory" protected="arabic/protwords.txt"/>
+			<filter class="solr.ArabicStemFilterFactory"/>
+			<filter class="solr.ArabicNormalizationFilterFactory"/>
+			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+		</analyzer>
+	</fieldType>
+
+	<!-- Exact matching of words like textWhiteSpaceTokenized,
+		but with enabled Synonym and Stop Filter
+	 -->
+	<fieldType name="textExact" class="solr.TextField" positionIncrementGap="100" >
+		<analyzer>
+			<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+
+			<filter class="solr.LowerCaseFilterFactory"/>
+			<filter class="solr.ManagedSynonymFilterFactory" managed="arabic"/>
+			<filter class="solr.ManagedStopFilterFactory" managed="arabic"/>
+			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+		</analyzer>
+	</fieldType>
+
+	<!-- Setup simple analysis for spell checking -->
+	<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" omitNorms="true">
+		<analyzer type="index">
+			<tokenizer class="solr.StandardTokenizerFactory"/>
+
+			<filter class="solr.LowerCaseFilterFactory"/>
+			<filter class="solr.ManagedStopFilterFactory" managed="arabic"/>
+
+			<filter class="solr.StandardFilterFactory" />
+			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+			<filter class="solr.ArabicNormalizationFilterFactory"/>
+			<filter class="solr.ArabicStemFilterFactory"/>
+		</analyzer>
+		<analyzer type="query">
+			<tokenizer class="solr.StandardTokenizerFactory" />
+
+			<filter class="solr.LowerCaseFilterFactory"/>
+
+			<filter class="solr.ManagedSynonymFilterFactory" managed="arabic"/>
+			<filter class="solr.ManagedStopFilterFactory" managed="arabic"/>
+
+			<filter class="solr.StandardFilterFactory" />
+			<filter class="solr.ArabicNormalizationFilterFactory"/>
+			<filter class="solr.ArabicStemFilterFactory"/>
+			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+		</analyzer>
+	</fieldType>
+
+</schema>
diff --git a/docker/typo3-solr/6.0/solr/configsets/ext_solr_6_0_0/conf/arabic/synonyms.txt b/docker/typo3-solr/6.0/solr/configsets/ext_solr_6_0_0/conf/arabic/synonyms.txt
@@ -0,0 +1,17 @@
+aaa => aaaa
+bbb => bbbb1 bbbb2
+ccc => cccc1,cccc2
+a\=>a => b\=>b
+a\,a => b\,b
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+