From c0c0643b840f2e6fbc32caeb999db52160c7a6e5 Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Fri, 22 Aug 2014 19:53:57 -0400 Subject: [PATCH] Add in the And-bible OSIS parser Took me a while to do the research I needed, sorry it's been so long. Largely useless currently, but gives me a starting place to adapt. --- app/build.gradle | 1 + .../service/book/VerseLookupService.java | 5 + .../service/format/Constants.java | 28 ++ .../minimalbible/service/format/Note.java | 31 ++ .../service/format/OsisSaxHandler.java | 91 +++++ .../format/OsisToCanonicalTextSaxHandler.java | 134 +++++++ .../format/osistohtml/BookmarkMarker.java | 66 ++++ .../format/osistohtml/FigureHandler.java | 39 ++ .../service/format/osistohtml/HiHandler.java | 66 ++++ .../format/osistohtml/HtmlTextWriter.java | 91 +++++ .../service/format/osistohtml/LGHandler.java | 56 +++ .../service/format/osistohtml/LHandler.java | 84 +++++ .../format/osistohtml/MyNoteMarker.java | 61 ++++ .../format/osistohtml/NoteHandler.java | 141 ++++++++ .../osistohtml/OsisToHtmlParameters.java | 225 ++++++++++++ .../osistohtml/OsisToHtmlSaxHandler.java | 341 ++++++++++++++++++ .../service/format/osistohtml/QHandler.java | 86 +++++ .../format/osistohtml/ReferenceHandler.java | 157 ++++++++ .../format/osistohtml/TagHandlerHelper.java | 82 +++++ .../format/osistohtml/TitleHandler.java | 79 ++++ .../format/osistohtml/VerseHandler.java | 81 +++++ .../HebrewCharacterPreprocessor.java | 99 +++++ .../preprocessor/TextPreprocessor.java | 14 + .../osistohtml/strongs/StrongsHandler.java | 170 +++++++++ .../strongs/StrongsLinkCreator.java | 43 +++ .../osistohtml/strongs/StrongsUtil.java | 60 +++ .../format/osistohtml/tei/OrthHandler.java | 34 ++ .../format/osistohtml/tei/PronHandler.java | 34 ++ .../format/osistohtml/tei/RefHandler.java | 24 ++ .../format/osistohtml/tei/TEIUtil.java | 18 + 30 files changed, 2441 insertions(+) create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/Constants.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/Note.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/OsisSaxHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/OsisToCanonicalTextSaxHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/BookmarkMarker.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/FigureHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/HiHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/HtmlTextWriter.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/LGHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/LHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/MyNoteMarker.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/NoteHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/OsisToHtmlParameters.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/OsisToHtmlSaxHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/QHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/ReferenceHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/TagHandlerHelper.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/TitleHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/VerseHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/preprocessor/HebrewCharacterPreprocessor.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/preprocessor/TextPreprocessor.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsLinkCreator.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsUtil.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/OrthHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/PronHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/RefHandler.java create mode 100644 app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/TEIUtil.java diff --git a/app/build.gradle b/app/build.gradle index 24347a8..86e839a 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -67,6 +67,7 @@ dependencies { compile 'com.readystatesoftware.systembartint:systembartint:+' compile 'com.netflix.rxjava:rxjava-android:+' compile 'com.android.support:appcompat-v7:20.+' + compile 'org.apache.commons:commons-lang3:+' androidTestCompile 'com.jayway.awaitility:awaitility:+' androidTestCompile 'org.mockito:mockito-core:+' diff --git a/app/src/main/java/org/bspeice/minimalbible/service/book/VerseLookupService.java b/app/src/main/java/org/bspeice/minimalbible/service/book/VerseLookupService.java index ddfb5e1..02e6dd7 100644 --- a/app/src/main/java/org/bspeice/minimalbible/service/book/VerseLookupService.java +++ b/app/src/main/java/org/bspeice/minimalbible/service/book/VerseLookupService.java @@ -64,6 +64,7 @@ public class VerseLookupService implements Action1 { /** * Perform the ugly work of getting the actual data for a verse + * * @param v * @return */ @@ -71,11 +72,15 @@ public class VerseLookupService implements Action1 { BookData bookData = new BookData(book, v); try { SAXEventProvider provider = bookData.getSAXEventProvider(); +// provider.provideSAXEvents(new OsisParser()); return provider.toString(); } catch (BookException e) { e.printStackTrace(); return "Unable to locate " + v.toString() + "!"; +// } catch (SAXException e) { +// e.printStackTrace(); } +// return null; } /** diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/Constants.java b/app/src/main/java/org/bspeice/minimalbible/service/format/Constants.java new file mode 100644 index 0000000..03b0afa --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/Constants.java @@ -0,0 +1,28 @@ +package org.bspeice.minimalbible.service.format; + +/** + * see http://www.crosswire.org/wiki/Frontends:URI_Standard + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class Constants { + // Strings for URL protocols/URI schemes + public static final String SWORD_PROTOCOL = "sword"; //$NON-NLS-1$ + public static final String BIBLE_PROTOCOL = "bible"; //$NON-NLS-1$ + public static final String DICTIONARY_PROTOCOL = "dict"; //$NON-NLS-1$ + public static final String GREEK_DEF_PROTOCOL = "gdef"; //$NON-NLS-1$ + public static final String HEBREW_DEF_PROTOCOL = "hdef"; //$NON-NLS-1$ + public static final String ALL_GREEK_OCCURRENCES_PROTOCOL = "allgoccur"; //$NON-NLS-1$ + public static final String ALL_HEBREW_OCCURRENCES_PROTOCOL = "allhoccur"; //$NON-NLS-1$ + public static final String ROBINSON_GREEK_MORPH_PROTOCOL = "robinson"; //$NON-NLS-1$ + public static final String HEBREW_MORPH_PROTOCOL = "hmorph"; //$NON-NLS-1$ + public static final String COMMENTARY_PROTOCOL = "comment"; //$NON-NLS-1$ + + public static class HTML { + public static final String NBSP = " "; + public static final String SPACE = " "; + public static final String BR = "
"; + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/Note.java b/app/src/main/java/org/bspeice/minimalbible/service/format/Note.java new file mode 100644 index 0000000..d928c68 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/Note.java @@ -0,0 +1,31 @@ +package org.bspeice.minimalbible.service.format; + +/** + * Info on a note or cross reference + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class Note { + + public static final String SUMMARY = "summary"; + ; + public static final String DETAIL = "detail"; + private static final String TAG = "Note"; + private String noteRef; + private String noteText; + + public Note(int verseNo, String noteRef, String noteText, NoteType noteType, String osisRef) { + super(); + this.noteRef = noteRef; + this.noteText = noteText; + } + + @Override + public String toString() { + return noteRef + ":" + noteText; + } + + public enum NoteType {TYPE_GENERAL, TYPE_REFERENCE} +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/OsisSaxHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/OsisSaxHandler.java new file mode 100644 index 0000000..aa23a5f --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/OsisSaxHandler.java @@ -0,0 +1,91 @@ +package org.bspeice.minimalbible.service.format; + + +import org.bspeice.minimalbible.service.format.osistohtml.HtmlTextWriter; +import org.xml.sax.Attributes; +import org.xml.sax.helpers.DefaultHandler; + +/** + * Convert OSIS input into Canonical text (used when creating search index) + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class OsisSaxHandler extends DefaultHandler { + + // debugging + private boolean isDebugMode = false; + + private HtmlTextWriter writer; + + public OsisSaxHandler() { + writer = new HtmlTextWriter(); + } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#toString() + */ + /* @Override */ + public String toString() { + return writer.getHtml(); + } + + protected String getName(String eName, String qName) { + if (eName != null && eName.length() > 0) { + return eName; + } else { + return qName; // not namespace-aware + } + } + + protected void write(String s) { + writer.write(s); + } + + /** + * check the value of the specified attribute and return true if same as checkvalue + * + * @param attrs + * @param attrName + * @param checkValue + * @return + */ + protected boolean isAttrValue(Attributes attrs, String attrName, String checkValue) { + if (attrs == null) { + return false; + } + String value = attrs.getValue(attrName); + return checkValue.equals(value); + } + + protected void debug(String name, Attributes attrs, boolean isStartTag) { + if (isDebugMode) { + write("*" + name); + if (attrs != null) { + for (int i = 0; i < attrs.getLength(); i++) { + String aName = attrs.getLocalName(i); // Attr name + if ("".equals(aName)) aName = attrs.getQName(i); + write(" "); + write(aName + "=\"" + attrs.getValue(i) + "\""); + } + } + write("*\n"); + } + } + + public void setDebugMode(boolean isDebugMode) { + this.isDebugMode = isDebugMode; + } + + protected void reset() { + writer.reset(); + } + + public HtmlTextWriter getWriter() { + return writer; + } +} + diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/OsisToCanonicalTextSaxHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/OsisToCanonicalTextSaxHandler.java new file mode 100644 index 0000000..d72a82e --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/OsisToCanonicalTextSaxHandler.java @@ -0,0 +1,134 @@ +package org.bspeice.minimalbible.service.format; + + +import org.bspeice.minimalbible.service.format.osistohtml.TagHandlerHelper; +import org.crosswire.jsword.book.OSISUtil; +import org.xml.sax.Attributes; + +import java.util.Stack; + +/** + * Convert OSIS input into Canonical text (used when creating search index) + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class OsisToCanonicalTextSaxHandler extends OsisSaxHandler { + + @SuppressWarnings("unused") + private int currentVerseNo; + + private Stack writeContentStack = new Stack(); + + public OsisToCanonicalTextSaxHandler() { + super(); + } + + ; + + @Override + public void startDocument() { + reset(); + // default mode is to write + writeContentStack.push(CONTENT_STATE.WRITE); + } + + /* + *Called when the Parser Completes parsing the Current XML File. + */ + @Override + public void endDocument() { + // pop initial value + writeContentStack.pop(); + assert (writeContentStack.isEmpty()); + } + + /* + * Called when the starting of the Element is reached. For Example if we have Tag + * called ... , then this method is called when tag is + * Encountered while parsing the Current XML File. The AttributeList Parameter has + * the list of all Attributes declared for the Current Element in the XML File. + */ + @Override + public void startElement(String namespaceURI, + String sName, // simple name + String qName, // qualified name + Attributes attrs) { + String name = getName(sName, qName); // element name + + debug(name, attrs, true); + + // if encountering either a verse tag or if the current tag is marked as being canonical then turn on writing + if (isAttrValue(attrs, "canonical", "true")) { + writeContentStack.push(CONTENT_STATE.WRITE); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_VERSE)) { + if (attrs != null) { + currentVerseNo = TagHandlerHelper.osisIdToVerseNum(attrs.getValue("", OSISUtil.OSIS_ATTR_OSISID)); + } + writeContentStack.push(CONTENT_STATE.WRITE); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_NOTE)) { + writeContentStack.push(CONTENT_STATE.IGNORE); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_TITLE)) { + writeContentStack.push(CONTENT_STATE.IGNORE); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_REFERENCE)) { + writeContentStack.push(CONTENT_STATE.IGNORE); + + } else if (name.equals(OSISUtil.OSIS_ELEMENT_L) || + name.equals(OSISUtil.OSIS_ELEMENT_LB) || + name.equals(OSISUtil.OSIS_ELEMENT_P)) { + // these occur in Psalms to separate different paragraphs. + // A space is needed for TTS not to be confused by punctuation with a missing space like 'toward us,and the' + write(" "); + //if writing then continue. Also if ignoring then continue + writeContentStack.push(writeContentStack.peek()); + } else { + // unknown tags rely on parent tag to determine if content is canonical e.g. the italic tag in the middle of canonical text + writeContentStack.push(writeContentStack.peek()); + } + } + + /* + * Called when the Ending of the current Element is reached. For example in the + * above explanation, this method is called when tag is reached + */ + @Override + public void endElement(String namespaceURI, + String sName, // simple name + String qName // qualified name + ) { + String name = getName(sName, qName); + debug(name, null, false); + if (name.equals(OSISUtil.OSIS_ELEMENT_VERSE)) { + // A space is needed to separate one verse from the next, otherwise the 2 verses butt up against each other + // which looks bad and confuses TTS + write(" "); + } + + // now this tag has ended pop the write/ignore state for the parent tag + writeContentStack.pop(); + } + + /* + * Handle characters encountered in tags + */ + @Override + public void characters(char buf[], int offset, int len) { + if (CONTENT_STATE.WRITE.equals(writeContentStack.peek())) { + String s = new String(buf, offset, len); + + write(s); + } + } + + protected void writeContent(boolean writeContent) { + if (writeContent) { + writeContentStack.push(CONTENT_STATE.WRITE); + } else { + writeContentStack.push(CONTENT_STATE.IGNORE); + } + } + + private enum CONTENT_STATE {WRITE, IGNORE} +} + diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/BookmarkMarker.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/BookmarkMarker.java new file mode 100644 index 0000000..a09db94 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/BookmarkMarker.java @@ -0,0 +1,66 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlSaxHandler.VerseInfo; +import org.crosswire.jsword.passage.Verse; + +import java.util.HashSet; +import java.util.Set; + +/** + * Display an img if the current verse has MyNote + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class BookmarkMarker { + + private Set bookmarkedVerses = new HashSet(); + + private OsisToHtmlParameters parameters; + + private VerseInfo verseInfo; + + private HtmlTextWriter writer; + + private boolean bookmarkOpenTagWritten = false; + + public BookmarkMarker(OsisToHtmlParameters parameters, VerseInfo verseInfo, HtmlTextWriter writer) { + this.parameters = parameters; + this.verseInfo = verseInfo; + this.writer = writer; + + // create hashset of verses to optimise verse note lookup + bookmarkedVerses.clear(); + if (parameters.getVersesWithBookmarks() != null) { + for (Verse verse : parameters.getVersesWithBookmarks()) { + bookmarkedVerses.add(verse.getVerse()); + } + } + } + + + public String getTagName() { + return ""; + } + + /** + * just after verse start tag + */ + public void start() { + if (bookmarkedVerses != null && parameters.isShowBookmarks()) { + if (bookmarkedVerses.contains(verseInfo.currentVerseNo)) { + writer.write(""); +// writer.write(""); + bookmarkOpenTagWritten = true; + } + } + } + + public void end() { + if (bookmarkOpenTagWritten) { +// writer.write(""); + bookmarkOpenTagWritten = false; + } + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/FigureHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/FigureHandler.java new file mode 100644 index 0000000..312dbe4 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/FigureHandler.java @@ -0,0 +1,39 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import org.apache.commons.lang3.StringUtils; +import org.crosswire.jsword.book.OSISUtil; +import org.xml.sax.Attributes; + +/** + * Handle
to display pictures + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class FigureHandler { + + private HtmlTextWriter writer; + private OsisToHtmlParameters parameters; + + public FigureHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) { + this.parameters = parameters; + this.writer = writer; + } + + public String getTagName() { + return "figure"; + } + + public void start(Attributes attrs) { + // Refer to Gen 3:14 in ESV for example use of type=x-indent + String src = attrs.getValue(OSISUtil.ATTRIBUTE_FIGURE_SRC); + + if (StringUtils.isNotEmpty(src)) { + writer.write(""); + } + } + + public void end() { + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/HiHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/HiHandler.java new file mode 100644 index 0000000..6ebd834 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/HiHandler.java @@ -0,0 +1,66 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import org.crosswire.jsword.book.OSISUtil; +import org.xml.sax.Attributes; + +import java.util.Arrays; +import java.util.List; + +import static org.crosswire.jsword.book.OSISUtil.HI_ACROSTIC; +import static org.crosswire.jsword.book.OSISUtil.HI_BOLD; +import static org.crosswire.jsword.book.OSISUtil.HI_EMPHASIS; +import static org.crosswire.jsword.book.OSISUtil.HI_ILLUMINATED; +import static org.crosswire.jsword.book.OSISUtil.HI_ITALIC; +import static org.crosswire.jsword.book.OSISUtil.HI_LINETHROUGH; +import static org.crosswire.jsword.book.OSISUtil.HI_NORMAL; +import static org.crosswire.jsword.book.OSISUtil.HI_SMALL_CAPS; +import static org.crosswire.jsword.book.OSISUtil.HI_SUB; +import static org.crosswire.jsword.book.OSISUtil.HI_SUPER; +import static org.crosswire.jsword.book.OSISUtil.HI_UNDERLINE; + + +/** + * Handle hi element e.g. the child with his mother Mary + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class HiHandler { + + // possible values of type attribute + private static final List HI_TYPE_LIST = Arrays.asList(new String[]{HI_ACROSTIC, HI_BOLD, HI_EMPHASIS, HI_ILLUMINATED, HI_ITALIC, HI_LINETHROUGH, HI_NORMAL, HI_SMALL_CAPS, HI_SUB, HI_SUPER, HI_UNDERLINE}); + + private final static String DEFAULT = "bold"; + + private HtmlTextWriter writer; + + public HiHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) { + this.writer = writer; + } + + public String getTagName() { + return "hi"; + } + + public void start(Attributes attrs) { + String type = attrs.getValue(OSISUtil.OSIS_ATTR_TYPE); + start(type, DEFAULT); + } + + public void start(String style, String defaultStyle) { + if (style == null || !HI_TYPE_LIST.contains(style)) { + style = defaultStyle; + } + + // add any styles that are relevant - the tag name and the style attribute + String cssClasses = getTagName() + " hi_" + style; + + // start span with CSS class of 'hi_*' e.g. hi_bold + writer.write(""); + } + + public void end() { + writer.write(""); + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/HtmlTextWriter.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/HtmlTextWriter.java new file mode 100644 index 0000000..522956c --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/HtmlTextWriter.java @@ -0,0 +1,91 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +/** + * Write characters out to a StringBuilder - used while creating html for display + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors. + */ +public class HtmlTextWriter { + + private StringBuilder writer; + + private int dontWriteRequestCount = 0; + + private int writeTempStoreRequestCount = 0; + private StringBuilder tempStore = new StringBuilder(); + + // allow insert at a certain position + private String overwrittenString = ""; + + public HtmlTextWriter() { + writer = new StringBuilder(); + } + + public void write(String htmlText) { + if (dontWriteRequestCount > 0) { + // ignore all text + } else if (writeTempStoreRequestCount == 0) { + writer.append(htmlText); + } else { + tempStore.append(htmlText); + } + } + + /** + * allow pre-verse headings + */ + public void beginInsertAt(int insertOffset) { + overwrittenString = writer.substring(insertOffset); + writer.delete(insertOffset, writer.length()); + } + + /** + * finish inserting and restore overwritten tail of string + */ + public void finishInserting() { + writer.append(overwrittenString); + overwrittenString = ""; + } + + public int getPosition() { + return writer.length(); + } + + public void removeAfter(int position) { + writer.delete(position, writer.length()); + } + + public void reset() { + writer.setLength(0); + } + + public void writeToTempStore() { + writeTempStoreRequestCount++; + } + + public void finishWritingToTempStore() { + writeTempStoreRequestCount--; + } + + public void clearTempStore() { + tempStore.delete(0, tempStore.length()); + } + + public String getTempStoreString() { + return tempStore.toString(); + } + + public String getHtml() { + return writer.toString(); + } + + public void setDontWrite(boolean dontWrite) { + if (dontWrite) { + dontWriteRequestCount++; + } else { + dontWriteRequestCount--; + } + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/LGHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/LGHandler.java new file mode 100644 index 0000000..ad32499 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/LGHandler.java @@ -0,0 +1,56 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + + +import org.xml.sax.Attributes; + +import java.util.Stack; + +/** + * The lg or "line group" element is used to contain any group of poetic lines. Poetic lines are handled at the line level by And Bible, not line group + * so this class does nothing. + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +@SuppressWarnings("unused") +public class LGHandler { + + private HtmlTextWriter writer; + + private OsisToHtmlParameters parameters; + + private Stack stack = new Stack(); + + public LGHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) { + this.parameters = parameters; + this.writer = writer; + } + + public String getTagName() { + return "lg"; + } + + public void start(Attributes attrs) { +// ignore this for now because it is untested +// LGType lgtype = LGType.IGNORE; +// if (TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_SID, attrs) || +// TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_EID, attrs)) { +// lgtype = LGType.IGNORE; +// } else { +// // allow spacing around groups of poetry +// writer.write("
"); +// lgtype = LGType.DIV; +// } +// stack.push(lgtype); + } + + public void end() { +// LGType lgtype = stack.pop(); +// if (LGType.DIV.equals(lgtype)) { +// writer.write("
"); +// } + } + + enum LGType {DIV, IGNORE} +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/LHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/LHandler.java new file mode 100644 index 0000000..b5fa3f1 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/LHandler.java @@ -0,0 +1,84 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import android.util.Log; + +import org.apache.commons.lang3.StringUtils; +import org.crosswire.jsword.book.OSISUtil; +import org.xml.sax.Attributes; + +import java.util.Stack; + +import static org.bspeice.minimalbible.service.format.Constants.HTML; + +/** + * This can either signify a quote or Red Letter + * Example from ESV Prov 19:1 + * ..... + *

+ * Apparently quotation marks are not supposed to appear in the KJV (https://sites.google.com/site/kjvtoday/home/Features-of-the-KJV/quotation-marks) + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class LHandler { + + private static String indent_html = HTML.NBSP + HTML.NBSP; + private HtmlTextWriter writer; + private Stack stack = new Stack(); + + public LHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) { + this.writer = writer; + int indentCharCount = 4; // TODO: Set a standard value for this + indent_html = StringUtils.repeat(HTML.NBSP, indentCharCount); + } + + public String getTagName() { + return "l"; + } + + public void startL(Attributes attrs) { + // Refer to Gen 3:14 in ESV for example use of type=x-indent + String type = attrs.getValue(OSISUtil.OSIS_ATTR_TYPE); + int level = TagHandlerHelper.getAttribute(OSISUtil.OSIS_ATTR_LEVEL, attrs, 1); + // make numIndents default to zero + int numIndents = Math.max(0, level - 1); + + LType ltype = LType.IGNORE; + if (StringUtils.isNotEmpty(type)) { + if (type.contains("indent")) { + // this tag is specifically for indenting so ensure there is an indent + numIndents = numIndents + 1; + writer.write(StringUtils.repeat(indent_html, numIndents)); + ltype = LType.INDENT; + } else if (type.contains("br")) { + writer.write(HTML.BR); + ltype = LType.BR; + } else { + ltype = LType.IGNORE; + Log.d("LHandler", "Unknown tag type:" + type); + } + } else if (TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_SID, attrs)) { + writer.write(StringUtils.repeat(indent_html, numIndents)); + ltype = LType.IGNORE; + } else if (TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_EID, attrs)) { + // e.g. Isaiah 40:12 + writer.write(HTML.BR); + ltype = LType.BR; + } else { + //simple + writer.write(StringUtils.repeat(indent_html, numIndents)); + ltype = LType.END_BR; + } + stack.push(ltype); + } + + public void endL() { + LType type = stack.pop(); + if (LType.END_BR.equals(type)) { + writer.write(HTML.BR); + } + } + + enum LType {INDENT, BR, END_BR, IGNORE} +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/MyNoteMarker.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/MyNoteMarker.java new file mode 100644 index 0000000..6b649d2 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/MyNoteMarker.java @@ -0,0 +1,61 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlSaxHandler.VerseInfo; +import org.crosswire.jsword.passage.Key; +import org.crosswire.jsword.passage.KeyUtil; +import org.crosswire.jsword.passage.Verse; + +import java.util.HashSet; +import java.util.Set; + +/** + * Display an img if the current verse has MyNote + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class MyNoteMarker { + + private Set myNoteVerses = new HashSet(); + + private OsisToHtmlParameters parameters; + + private VerseInfo verseInfo; + + private HtmlTextWriter writer; + + public MyNoteMarker(OsisToHtmlParameters parameters, VerseInfo verseInfo, HtmlTextWriter writer) { + this.parameters = parameters; + this.verseInfo = verseInfo; + this.writer = writer; + + // create hashmap of verses to optimise verse note lookup + myNoteVerses.clear(); + if (parameters.getVersesWithNotes() != null) { + for (Key key : parameters.getVersesWithNotes()) { + Verse verse = KeyUtil.getVerse(key); + myNoteVerses.add(verse.getVerse()); + } + } + } + + + public String getTagName() { + return ""; + } + + /** + * just after verse start tag + */ + public void start() { + if (myNoteVerses != null && parameters.isShowMyNotes()) { + if (myNoteVerses.contains(verseInfo.currentVerseNo)) { + writer.write(""); + } + } + } + + public void end() { + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/NoteHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/NoteHandler.java new file mode 100644 index 0000000..2bbf874 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/NoteHandler.java @@ -0,0 +1,141 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import org.apache.commons.lang3.StringUtils; +import org.bspeice.minimalbible.service.format.Note; +import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlSaxHandler.VerseInfo; +import org.xml.sax.Attributes; + +import java.util.ArrayList; +import java.util.List; + +import static org.bspeice.minimalbible.service.format.Note.NoteType; + +/** + * Convert OSIS tags into html tags + *

+ * Example OSIS tags from KJV Ps 119 v1 showing title, w, note + * + * <foreign n="?">ALEPH.</foreign> + * + * Blessed are the undefiled + * ... who walk + * ... of the Lord. + * undefiled: or, perfect, or, sincere + *

+ * Example of notes cross references from ESV + * In the Job 38:4-7; Ps. 33:6; 136:5; Isa. 42:5; 45:18; John 1:1-3; Acts 14:15; 17:24; Col. 1:16, 17; Heb. 1:10; 11:3; Rev. 4:11beginning + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class NoteHandler { + + private OsisToHtmlParameters parameters; + private VerseInfo verseInfo; + + private int noteCount = 0; + + //todo temporarily use a string but later switch to Map of verse->note + private List notesList = new ArrayList(); + private boolean isInNote = false; + private String currentNoteRef; + + private HtmlTextWriter writer; + + public NoteHandler(OsisToHtmlParameters osisToHtmlParameters, VerseInfo verseInfo, HtmlTextWriter theWriter) { + this.parameters = osisToHtmlParameters; + this.verseInfo = verseInfo; + this.writer = theWriter; + } + + public void startNote(Attributes attrs) { + isInNote = true; + currentNoteRef = getNoteRef(attrs); + writeNoteRef(currentNoteRef); + + // prepare to fetch the actual note into the notes repo + writer.writeToTempStore(); + } + + /* + * Called when the Ending of the current Element is reached. For example in the + * above explanation, this method is called when tag is reached + */ + public void endNote() { + String noteText = writer.getTempStoreString(); + if (noteText.length() > 0) { + if (!StringUtils.containsOnly(noteText, "[];().,")) { + Note note = new Note(verseInfo.currentVerseNo, currentNoteRef, noteText, NoteType.TYPE_GENERAL, null); + notesList.add(note); + } + // and clear the buffer + writer.clearTempStore(); + } + isInNote = false; + writer.finishWritingToTempStore(); + } + + /** + * a reference is finished and now the note must be added + */ + public void addNoteForReference(String refText, String osisRef) { + // add teh html to show a note character in the (bible) text + // a few modules like HunUj have refs in the text but not surrounded by a Note tag (like esv) so need to add Note here + // special code to cope with HunUj problem + if (parameters.isAutoWrapUnwrappedRefsInNote() && !isInNote()) { + currentNoteRef = createNoteRef(); + writeNoteRef(currentNoteRef); + } + + // record the note information to show if user requests to see notes for this verse + if (isInNote || parameters.isAutoWrapUnwrappedRefsInNote()) { + Note note = new Note(verseInfo.currentVerseNo, currentNoteRef, refText, NoteType.TYPE_REFERENCE, osisRef); + notesList.add(note); + } + } + + /** + * either use the 'n' attribute for the note ref or just get the next character in a list a-z + * + * @return a single char to use as a note ref + */ + private String getNoteRef(Attributes attrs) { + // if the ref is specified as an attribute then use that + String noteRef = attrs.getValue("n"); + if (StringUtils.isEmpty(noteRef)) { + noteRef = createNoteRef(); + } + return noteRef; + } + + /** + * either use the character passed in or get the next character in a list a-z + * + * @return a single char to use as a note ref + */ + private String createNoteRef() { + // else just get the next char + int inta = (int) 'a'; + char nextNoteChar = (char) (inta + (noteCount++ % 26)); + return String.valueOf(nextNoteChar); + } + + /** + * write noteref html to outputstream + */ + private void writeNoteRef(String noteRef) { + if (parameters.isShowNotes()) { + writer.write("" + noteRef + " "); + } + } + + public boolean isInNote() { + return isInNote; + } + + public List getNotesList() { + return notesList; + } +} + diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/OsisToHtmlParameters.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/OsisToHtmlParameters.java new file mode 100644 index 0000000..bd64ba6 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/OsisToHtmlParameters.java @@ -0,0 +1,225 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import org.crosswire.jsword.passage.Key; +import org.crosswire.jsword.passage.KeyUtil; +import org.crosswire.jsword.passage.Verse; +import org.crosswire.jsword.versification.Versification; +import org.crosswire.jsword.versification.system.Versifications; + +import java.net.URI; +import java.util.List; + +/** + * Parameters passed into the Osis to HTML converter + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class OsisToHtmlParameters { + private String languageCode = "en"; + private boolean isLeftToRight = true; + private boolean isShowTitles = true; + private boolean isShowVerseNumbers = false; + private boolean isVersePerline = false; + private boolean isShowMyNotes = false; + private boolean isShowBookmarks = false; + private boolean isShowNotes = false; + private boolean isAutoWrapUnwrappedRefsInNote = false; + // used as a basis if a reference has only chapter and no book + private Verse basisRef; + private Versification documentVersification; + private String font; + private String cssClassForCustomFont; + + private boolean isShowStrongs = false; + private boolean isShowMorphology = false; + private boolean isRedLetter = false; + private String extraStylesheet; + private String extraFooter; + private boolean convertStrongsRefsToLinks; + private List versesWithNotes; + private List versesWithBookmarks; + private URI moduleBasePath; + + public String getLanguageCode() { + return languageCode; + } + + public void setLanguageCode(String languageCode) { + this.languageCode = languageCode; + } + + public boolean isLeftToRight() { + return isLeftToRight; + } + + public void setLeftToRight(boolean isLeftToRight) { + this.isLeftToRight = isLeftToRight; + } + + public boolean isShowTitles() { + return isShowTitles; + } + + public void setShowTitles(boolean isShowTitles) { + this.isShowTitles = isShowTitles; + } + + public boolean isShowVerseNumbers() { + return isShowVerseNumbers; + } + + public void setShowVerseNumbers(boolean isShowVerseNumbers) { + this.isShowVerseNumbers = isShowVerseNumbers; + } + + public boolean isVersePerline() { + return isVersePerline; + } + + public void setVersePerline(boolean isVersePerline) { + this.isVersePerline = isVersePerline; + } + + public boolean isShowMyNotes() { + return isShowMyNotes; + } + + public void setShowMyNotes(boolean isShowMyNotes) { + this.isShowMyNotes = isShowMyNotes; + } + + public boolean isShowBookmarks() { + return isShowBookmarks; + } + + public void setShowBookmarks(boolean isShowBookmarks) { + this.isShowBookmarks = isShowBookmarks; + } + + public boolean isShowNotes() { + return isShowNotes; + } + + public void setShowNotes(boolean isShowNotes) { + this.isShowNotes = isShowNotes; + } + + public boolean isAutoWrapUnwrappedRefsInNote() { + return isAutoWrapUnwrappedRefsInNote; + } + + public void setAutoWrapUnwrappedRefsInNote(boolean isAutoWrapUnwrappedRefsInNote) { + this.isAutoWrapUnwrappedRefsInNote = isAutoWrapUnwrappedRefsInNote; + } + + public boolean isShowStrongs() { + return isShowStrongs; + } + + public void setShowStrongs(boolean isShowStrongs) { + this.isShowStrongs = isShowStrongs; + } + + public boolean isShowMorphology() { + return isShowMorphology; + } + + public void setShowMorphology(boolean isShowMorphology) { + this.isShowMorphology = isShowMorphology; + } + + public String getExtraStylesheet() { + return extraStylesheet; + } + + public void setExtraStylesheet(String extraStylesheet) { + this.extraStylesheet = extraStylesheet; + } + + public String getExtraFooter() { + return extraFooter; + } + + public void setExtraFooter(String extraFooter) { + this.extraFooter = extraFooter; + } + + public Verse getBasisRef() { + return basisRef; + } + + public void setBasisRef(Key basisRef) { + // KeyUtil always returns a Verse even if it is only Gen 1:1 + this.basisRef = KeyUtil.getVerse(basisRef); + } + + public boolean isRedLetter() { + return isRedLetter; + } + + public void setRedLetter(boolean isRedLetter) { + this.isRedLetter = isRedLetter; + } + + public String getFont() { + return font; + } + + public void setFont(String font) { + this.font = font; + } + + public String getCssClassForCustomFont() { + return cssClassForCustomFont; + } + + public void setCssClassForCustomFont(String cssClassForCustomFont) { + this.cssClassForCustomFont = cssClassForCustomFont; + } + + public boolean isConvertStrongsRefsToLinks() { + return convertStrongsRefsToLinks; + } + + public void setConvertStrongsRefsToLinks(boolean convertStrongsRefsToLinks) { + this.convertStrongsRefsToLinks = convertStrongsRefsToLinks; + } + + public List getVersesWithNotes() { + return versesWithNotes; + } + + public void setVersesWithNotes(List versesWithNotes) { + this.versesWithNotes = versesWithNotes; + } + + public List getVersesWithBookmarks() { + return versesWithBookmarks; + } + + public void setVersesWithBookmarks(List versesWithBookmarks) { + this.versesWithBookmarks = versesWithBookmarks; + } + + public URI getModuleBasePath() { + return moduleBasePath; + } + + public void setModuleBasePath(URI moduleBasePath) { + this.moduleBasePath = moduleBasePath; + } + + public Versification getDocumentVersification() { + if (documentVersification != null) { + return documentVersification; + } else { + return Versifications.instance().getVersification(Versifications.DEFAULT_V11N); + } + } + + public void setDocumentVersification(Versification documentVersification) { + this.documentVersification = documentVersification; + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/OsisToHtmlSaxHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/OsisToHtmlSaxHandler.java new file mode 100644 index 0000000..62fba4d --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/OsisToHtmlSaxHandler.java @@ -0,0 +1,341 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import android.util.Log; + +import org.apache.commons.lang3.StringUtils; +import org.bspeice.minimalbible.service.format.OsisSaxHandler; +import org.bspeice.minimalbible.service.format.osistohtml.preprocessor.HebrewCharacterPreprocessor; +import org.bspeice.minimalbible.service.format.osistohtml.preprocessor.TextPreprocessor; +import org.bspeice.minimalbible.service.format.osistohtml.strongs.StrongsHandler; +import org.bspeice.minimalbible.service.format.osistohtml.strongs.StrongsLinkCreator; +import org.bspeice.minimalbible.service.format.osistohtml.tei.OrthHandler; +import org.bspeice.minimalbible.service.format.osistohtml.tei.PronHandler; +import org.bspeice.minimalbible.service.format.osistohtml.tei.RefHandler; +import org.bspeice.minimalbible.service.format.osistohtml.tei.TEIUtil; +import org.crosswire.jsword.book.OSISUtil; +import org.xml.sax.Attributes; + +import static org.bspeice.minimalbible.service.format.Constants.HTML; + +/** + * Convert OSIS tags into html tags + *

+ * Example OSIS tags from KJV Ps 119 v1 showing title, w, note <foreign + * n="?">ALEPH.</foreign> Blessed + * are the + * undefiled ... who + * walk ... of the + * Lord. undefiled: + * or, perfect, or, sincere + *

+ * Example of notes cross references from ESV In the Job + * 38:4-7; Ps. 33:6; + * 136:5; Isa. 42:5; 45:18; John 1:1-3; Acts 14:15; 17:24; Col. 1:16, 17; Heb. 1:10; 11:3; Rev. + * 4:11beginning + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class OsisToHtmlSaxHandler extends OsisSaxHandler { + + private static final String HEBREW_LANGUAGE_CODE = "he"; + // properties + private OsisToHtmlParameters parameters; + // tag handlers for the different OSIS tags + private VerseHandler verseHandler; + private MyNoteMarker myNoteMarker; + private BookmarkMarker bookmarkMarker; + private NoteHandler noteHandler; + private ReferenceHandler referenceHandler; + private RefHandler refHandler; + private TitleHandler titleHandler; + private QHandler qHandler; + private LGHandler lgHandler; + private LHandler lHandler; + private HiHandler hiHandler; + private OrthHandler orthHandler; + private PronHandler pronHandler; + private StrongsHandler strongsHandler; + private FigureHandler figureHandler; + // processor for the tag content + private TextPreprocessor textPreprocessor; + // internal logic + private VerseInfo verseInfo = new VerseInfo(); + private boolean isAnyTextWritten = false; + + public OsisToHtmlSaxHandler(OsisToHtmlParameters parameters) { + super(); + this.parameters = parameters; + verseHandler = new VerseHandler(parameters, verseInfo, getWriter()); + myNoteMarker = new MyNoteMarker(parameters, verseInfo, getWriter()); + bookmarkMarker = new BookmarkMarker(parameters, verseInfo, getWriter()); + referenceHandler = new ReferenceHandler(parameters, noteHandler, getWriter()); + refHandler = new RefHandler(parameters, noteHandler, getWriter()); + titleHandler = new TitleHandler(parameters, verseInfo, getWriter()); + qHandler = new QHandler(parameters, getWriter()); + hiHandler = new HiHandler(parameters, getWriter()); + orthHandler = new OrthHandler(parameters, getWriter()); + pronHandler = new PronHandler(parameters, getWriter()); + lgHandler = new LGHandler(parameters, getWriter()); + lHandler = new LHandler(parameters, getWriter()); + strongsHandler = new StrongsHandler(parameters, getWriter()); + figureHandler = new FigureHandler(parameters, getWriter()); + + //TODO at the moment we can only have a single TextPreprocesor, need to chain them and maybe make the writer a TextPreprocessor and put it at the end of the chain + if (HEBREW_LANGUAGE_CODE.equals(parameters.getLanguageCode())) { + textPreprocessor = new HebrewCharacterPreprocessor(); + } else if (parameters.isConvertStrongsRefsToLinks()) { + textPreprocessor = new StrongsLinkCreator(); + } + + } + + @Override + public void startDocument() { + String jsTag = "\n\n"; + String styleSheetTag = ""; + String extraStyleSheetTag = ""; + if (parameters.getExtraStylesheet() != null) { + extraStyleSheetTag = ""; + } + write(" " + + "" + + styleSheetTag + extraStyleSheetTag + "\n" + + jsTag + + "" + + "" + + ""); + + // force rtl for rtl languages - rtl support on Android is poor but + // forcing it seems to help occasionally + if (!parameters.isLeftToRight()) { + write(""); + } + } + + /* + * Called when the Parser Completes parsing the Current XML File. + */ + @Override + public void endDocument() { + + // close last verse + if (parameters.isVersePerline()) { + //close last verse + if (verseInfo.currentVerseNo > 1) { + write(""); + } + } + + // add optional footer e.g. Strongs show all occurrences link + if (StringUtils.isNotEmpty(parameters.getExtraFooter())) { + write(parameters.getExtraFooter()); + } + + if (!parameters.isLeftToRight()) { + write(""); + } + // add padding at bottom to allow last verse to scroll to top of page + // and become current verse + write(""); + } + + /* + * Called when the starting of the Element is reached. For Example if we + * have Tag called ... , then this method is called when + * tag is Encountered while parsing the Current XML File. The + * AttributeList Parameter has the list of all Attributes declared for the + * Current Element in the XML File. + */ + @Override + public void startElement(String namespaceURI, + String sName, // simple name + String qName, // qualified name + Attributes attrs) { + String name = getName(sName, qName); // element name + + debug(name, attrs, true); + + if (name.equals(OSISUtil.OSIS_ELEMENT_VERSE)) { + verseHandler.startAndUpdateVerse(attrs); + bookmarkMarker.start(); + myNoteMarker.start(); + // record that we are into a new verse + verseInfo.isTextSinceVerse = false; + } else if (name.equals(OSISUtil.OSIS_ELEMENT_TITLE)) { + titleHandler.start(attrs); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_NOTE)) { + noteHandler.startNote(attrs); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_REFERENCE)) { + referenceHandler.start(attrs); + } else if (name.equals(TEIUtil.TEI_ELEMENT_REF)) { + refHandler.start(attrs); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_LB)) { + if (isAnyTextWritten) { + write(HTML.BR); + } + } else if (name.equals(OSISUtil.OSIS_ELEMENT_LG)) { + lgHandler.start(attrs); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_L)) { + lHandler.startL(attrs); + } else if (name.equals("div")) { + String type = attrs.getValue("type"); + if ("paragraph".equals(type)) { + // ignore sID start paragraph sID because it often comes after the verse no and causes a gap between verse no verse text + String eID = attrs.getValue("eID"); + if (eID != null && isAnyTextWritten) { + write("<p />"); + } + } + } else if (name.equals(OSISUtil.OSIS_ELEMENT_P)) { + write("<p>"); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_Q)) { + qHandler.start(attrs); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_HI)) { + hiHandler.start(attrs); + } else if (name.equals(TEIUtil.TEI_ELEMENT_ORTH)) { + orthHandler.start(attrs); + } else if (name.equals(TEIUtil.TEI_ELEMENT_PRON)) { + pronHandler.start(attrs); + } else if (name.equals("milestone")) { + String type = attrs.getValue(OSISUtil.OSIS_ATTR_TYPE); + if (StringUtils.isNotEmpty(type)) { + if (type.equals("line") || type.equals("x-p")) { + if (isAnyTextWritten) { + //e.g. NETtext Mt 4:14; KJV Gen 1:6 + writeOptionallyBeforeVerse(HTML.BR); + } + } + } + } else if (name.equals("transChange")) { + write("<span class='transChange'>"); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_W)) { + strongsHandler.start(attrs); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_FIGURE)) { + figureHandler.start(attrs); + } else { + // TODO: Cleanup + Log.i("OsisToHtmlSaxHandler", "Verse " + verseInfo.currentVerseNo + " unsupported OSIS tag:" + name); + } + } + + /* + * Called when the Ending of the current Element is reached. For example in + * the above explanation, this method is called when tag is reached + */ + @Override + public void endElement(String namespaceURI, String sName, // simple name + String qName // qualified name + ) { + String name = getName(sName, qName); + + debug(name, null, false); + + if (name.equals(OSISUtil.OSIS_ELEMENT_TITLE)) { + titleHandler.end(); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_VERSE)) { + myNoteMarker.end(); + bookmarkMarker.end(); + verseHandler.end(); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_NOTE)) { + noteHandler.endNote(); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_REFERENCE)) { + referenceHandler.end(); + } else if (name.equals(TEIUtil.TEI_ELEMENT_REF)) { + refHandler.end(); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_LG)) { + lgHandler.end(); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_L)) { + lHandler.endL(); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_P)) { + write("

"); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_Q)) { + // end quotation, but tag is a marker and contains no content + // so will appear at beginning and end of speech + qHandler.end(); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_HI)) { + hiHandler.end(); + } else if (name.equals(TEIUtil.TEI_ELEMENT_ORTH)) { + orthHandler.end(); + } else if (name.equals(TEIUtil.TEI_ELEMENT_PRON)) { + pronHandler.end(); + } else if (name.equals("transChange")) { + write(""); + } else if (name.equals(OSISUtil.OSIS_ELEMENT_W)) { + strongsHandler.end(); + } + } + + /* + * While Parsing the XML file, if extra characters like space or enter + * Character are encountered then this method is called. If you don't want + * to do anything special with these characters, then you can normally leave + * this method blank. + */ + @Override + public void characters(char buf[], int offset, int len) { + String s = new String(buf, offset, len); + + // record that we are now beyond the verse, but do it quickly so as not to slow down parsing + verseInfo.isTextSinceVerse = verseInfo.isTextSinceVerse || + len > 2 || + StringUtils.isNotBlank(s); + isAnyTextWritten = isAnyTextWritten || verseInfo.isTextSinceVerse; + + if (textPreprocessor != null) { + s = textPreprocessor.process(s); + } + + write(s); + } + + /** + * allow line breaks and titles to be moved before verse number + */ + protected void writeOptionallyBeforeVerse(String s) { + boolean writeBeforeVerse = !verseInfo.isTextSinceVerse; + if (writeBeforeVerse) { + getWriter().beginInsertAt(verseInfo.positionToInsertBeforeVerse); + } + getWriter().write(s); + if (writeBeforeVerse) { + getWriter().finishInserting(); + } + } + + /* + * In the XML File if the parser encounters a Processing Instruction which + * is declared like this Then this method is called where Target + * parameter will have "ProgramName:BooksLib" and data parameter will have + * QUERY="author, isbn, price". You can invoke a External Program from this + * Method if required. + */ + public void processingInstruction(String target, String data) { + // noop + } + + public String getDirection() { + return parameters.isLeftToRight() ? "ltr" : "rtl"; + } + + class VerseInfo { + int currentVerseNo; + int positionToInsertBeforeVerse; + boolean isTextSinceVerse = false; + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/QHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/QHandler.java new file mode 100644 index 0000000..f80b01a --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/QHandler.java @@ -0,0 +1,86 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import org.crosswire.jsword.book.OSISUtil; +import org.xml.sax.Attributes; + +import java.util.Stack; + +/** + * This can either signify a quote or Red Letter + * Example from ESV + * But he answered them, You see all these + * Example from KJV + * said ... unto them, ...See ye + *

+ * Apparently quotation marks are not supposed to appear in the KJV (https://sites.google.com/site/kjvtoday/home/Features-of-the-KJV/quotation-marks) + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class QHandler { + + private static final String MARKER = "marker"; + private static final String HTML_QUOTE_ENTITY = """; + private HtmlTextWriter writer; + private OsisToHtmlParameters parameters; + ; + // quotes can be embedded so maintain a stack of info about each quote to be used when closing quote + private Stack stack = new Stack(); + + public QHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) { + this.parameters = parameters; + this.writer = writer; + } + + public String getTagName() { + return "q"; + } + + public void start(Attributes attrs) { + QuoteInfo quoteInfo = new QuoteInfo(); + + String who = attrs.getValue(OSISUtil.ATTRIBUTE_Q_WHO); + boolean isWho = who != null; + + quoteInfo.isMilestone = TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_SID, attrs) || TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_EID, attrs); + + // Jesus -> no default quote + quoteInfo.marker = TagHandlerHelper.getAttribute(MARKER, attrs, isWho ? "" : HTML_QUOTE_ENTITY); + + quoteInfo.isRedLetter = parameters.isRedLetter() && "Jesus".equals(who); + + // apply the above logic + writer.write(quoteInfo.marker); + if (quoteInfo.isRedLetter) { + writer.write(""); + } + + // and save the info for the closing tag + stack.push(quoteInfo); + } + + public void end() { + QuoteInfo quoteInfo = stack.pop(); + + // Jesus words + if (quoteInfo.isRedLetter) { + writer.write(""); + } + + // milestone opening and closing tags are doubled up so ensure not double quotes + if (!quoteInfo.isMilestone) { + writer.write(quoteInfo.marker); + } + } + + enum QType {quote, redLetter} + + private static class QuoteInfo { + private boolean isMilestone; + private boolean isRedLetter; + private String marker = HTML_QUOTE_ENTITY; + + + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/ReferenceHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/ReferenceHandler.java new file mode 100644 index 0000000..2e86b81 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/ReferenceHandler.java @@ -0,0 +1,157 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + + +import android.util.Log; + +import org.apache.commons.lang3.StringUtils; +import org.crosswire.jsword.book.OSISUtil; +import org.crosswire.jsword.passage.Key; +import org.crosswire.jsword.passage.Passage; +import org.crosswire.jsword.passage.PassageKeyFactory; +import org.crosswire.jsword.passage.RestrictionType; +import org.crosswire.jsword.passage.VerseRange; +import org.xml.sax.Attributes; + +import java.util.Iterator; + +import static org.bspeice.minimalbible.service.format.Constants.BIBLE_PROTOCOL; + +/** + * Convert OSIS tags into html tags + *

+ * Example OSIS tags from KJV Ps 119 v1 showing title, w, note + * + * <foreign n="?">ALEPH.</foreign> + * + * Blessed are the undefiled + * ... who walk + * ... of the Lord. + * undefiled: or, perfect, or, sincere + *

+ * Example of notes cross references from ESV + * In the Job 38:4-7; Ps. 33:6; 136:5; Isa. 42:5; 45:18; John 1:1-3; Acts 14:15; 17:24; Col. 1:16, 17; Heb. 1:10; 11:3; Rev. 4:11beginning + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class ReferenceHandler { + + private final String TAG = "ReferenceHandler"; + private OsisToHtmlParameters parameters; + private String currentRefOsisRef; + private NoteHandler noteHandler; + private HtmlTextWriter writer; + + public ReferenceHandler(OsisToHtmlParameters osisToHtmlParameters, NoteHandler noteHandler, HtmlTextWriter theWriter) { + this.parameters = osisToHtmlParameters; + this.noteHandler = noteHandler; + this.writer = theWriter; + } + + public void start(Attributes attrs) { + // store the osisRef attribute for use with the note + String target = attrs.getValue(OSISUtil.OSIS_ATTR_REF); + start(target); + } + + protected void start(String target) { + // don't need to do anything until closing reference tag except.. + // delete separators like ';' that sometimes occur between reference tags + writer.clearTempStore(); + writer.writeToTempStore(); + // store the osisRef attribute for use with the note + this.currentRefOsisRef = target; + } + + public void end() { + writer.finishWritingToTempStore(); + + if (noteHandler.isInNote() || parameters.isAutoWrapUnwrappedRefsInNote()) { + noteHandler.addNoteForReference(writer.getTempStoreString(), currentRefOsisRef); + } else { + String refText = writer.getTempStoreString(); + writer.write(getReferenceTag(currentRefOsisRef, refText)); + } + + // and clear the buffer + writer.clearTempStore(); + currentRefOsisRef = null; + } + + /** + * create a link tag from an OSISref and the content of the tag + */ + private String getReferenceTag(String reference, String content) { + Log.d(TAG, "Ref:" + reference + " Content:" + content); + StringBuilder result = new StringBuilder(); + try { + + //JSword does not know the basis (default book) so prepend it if it looks like JSword failed to work it out + //We only need to worry about the first ref because JSword uses the first ref as the basis for the subsequent refs + // if content starts with a number and is not followed directly by an alpha char e.g. 1Sa + if (reference == null && content != null && content.length() > 0 && StringUtils.isNumeric(content.subSequence(0, 1)) && + (content.length() < 2 || !StringUtils.isAlphaSpace(content.subSequence(1, 2)))) { + + // maybe should use VerseRangeFactory.fromstring(orig, basis) + // this check for a colon to see if the first ref is verse:chap is not perfect but it will do until JSword adds a fix + int firstColonPos = content.indexOf(":"); + boolean isVerseAndChapter = firstColonPos > 0 && firstColonPos < 4; + if (isVerseAndChapter) { + reference = parameters.getBasisRef().getBook().getOSIS() + " " + content; + } else { + reference = parameters.getBasisRef().getBook().getOSIS() + " " + parameters.getBasisRef().getChapter() + ":" + content; + } + Log.d(TAG, "Patched reference:" + reference); + } else if (reference == null) { + reference = content; + } + + // convert urns of type book:key to sword://book/key to simplify urn parsing (1 fewer case to check for). + // Avoid urls of type 'matt 3:14' by excludng urns with a space + if (reference.contains(":") && !reference.contains(" ") && !reference.startsWith("sword://")) { + reference = "sword://" + reference.replace(":", "/"); + } + + boolean isFullSwordUrn = reference.contains("/") && reference.contains(":"); + if (isFullSwordUrn) { + // e.g. sword://StrongsRealGreek/01909 + // don't play with the reference - just assume it is correct + result.append(""); + result.append(content); + result.append(""); + } else { + Passage ref = (Passage) PassageKeyFactory.instance().getKey(parameters.getDocumentVersification(), reference); + boolean isSingleVerse = ref.countVerses() == 1; + boolean isSimpleContent = content.length() < 3 && content.length() > 0; + Iterator it = ref.rangeIterator(RestrictionType.CHAPTER); + + if (isSingleVerse && isSimpleContent) { + // simple verse no e.g. 1 or 2 preceding the actual verse in TSK + result.append(""); + result.append(content); + result.append(""); + } else { + // multiple complex references + boolean isFirst = true; + while (it.hasNext()) { + Key key = it.next(); + if (!isFirst) { + result.append(" "); + } + result.append(""); + result.append(key); + result.append(""); + isFirst = false; + } + } + } + } catch (Exception e) { + Log.e(TAG, "Error parsing OSIS reference:" + reference); + // just return the content with no html markup + result.append(content); + } + return result.toString(); + } +} + diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/TagHandlerHelper.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/TagHandlerHelper.java new file mode 100644 index 0000000..34e7f65 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/TagHandlerHelper.java @@ -0,0 +1,82 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import android.util.Log; + +import org.apache.commons.lang3.StringUtils; +import org.xml.sax.Attributes; + +/** + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's author. + */ +public class TagHandlerHelper { + + private static final String TAG = "TagHandlerHelper"; + + /** + * support defaultvalue with attribute fetch + */ + public static String getAttribute(String attributeName, Attributes attrs, String defaultValue) { + String attrValue = attrs.getValue(attributeName); + if (attrValue != null) { + return attrValue; + } else { + return defaultValue; + } + } + + /** + * support defaultvalue with attribute fetch + */ + public static int getAttribute(String attributeName, Attributes attrs, int defaultValue) { + int retval = defaultValue; + try { + String attrValue = attrs.getValue(attributeName); + if (attrValue != null) { + retval = Integer.parseInt(attrValue); + } + } catch (Exception e) { + Log.w(TAG, "Non numeric but expected integer for " + attributeName); + } + return retval; + } + + /** + * see if an attribute exists and has a value + * + * @param attributeName + * @param attrs + * @return + */ + public static boolean isAttr(String attributeName, Attributes attrs) { + String attrValue = attrs.getValue(attributeName); + return StringUtils.isNotEmpty(attrValue); + } + + /** + * return verse from osis id of format book.chap.verse + * + * @param osisID osis Id + * @return verse number + */ + public static int osisIdToVerseNum(String osisID) { + /* You have to use "\\.", the first backslash is interpreted as an escape by the + Java compiler, so you have to use two to get a String that contains one + backslash and a dot, which is what you want the regexp engine to see.*/ + if (osisID != null) { + String[] parts = osisID.split("\\."); + if (parts.length > 1) { + String verse = parts[parts.length - 1]; + return Integer.valueOf(verse); + } + } + return 0; + } + + public static void printAttributes(Attributes attrs) { + for (int i = 0; i < attrs.getLength(); i++) { + Log.d(TAG, attrs.getLocalName(i) + ":" + attrs.getValue(i)); + } + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/TitleHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/TitleHandler.java new file mode 100644 index 0000000..7f3b209 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/TitleHandler.java @@ -0,0 +1,79 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import org.apache.commons.lang3.StringUtils; +import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlSaxHandler.VerseInfo; +import org.crosswire.jsword.book.OSISUtil; +import org.xml.sax.Attributes; + +/** + * This can either signify a quote or Red Letter + * Example + * ESV section heading + * ESV canonical heading<title canonical="true" subType="x-preverse" type="section">To the choirmaster. Of David, + * WEB when formatted with JSword seems to have type="x-gen" + * <p/> + * Apparently quotation marks are not supposed to appear in the KJV (https://sites.google.com/site/kjvtoday/home/Features-of-the-KJV/quotation-marks) + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.<br> + * The copyright to this program is held by it's author. + */ +public class TitleHandler { + + private static final String PREVERSE = "preverse"; // the full string is 'x-preverse' but we just check for contains for extra tolerance + private HtmlTextWriter writer; + private VerseInfo verseInfo; + private OsisToHtmlParameters parameters; + private boolean isShowTitle; + private boolean isMoveBeforeVerse; + + public TitleHandler(OsisToHtmlParameters parameters, VerseInfo verseInfo, HtmlTextWriter writer) { + this.parameters = parameters; + this.verseInfo = verseInfo; + this.writer = writer; + } + + + public String getTagName() { + return OSISUtil.OSIS_ELEMENT_TITLE; + } + + public void start(Attributes attrs) { + //JSword adds the chapter no at the top but hide this because the chapter is in the And Bible header + boolean addedByJSword = attrs.getLength() == 1 && OSISUtil.GENERATED_CONTENT.equals(attrs.getValue(OSISUtil.OSIS_ATTR_TYPE)); + // otherwise show if user wants Titles or the title is canonical + isShowTitle = !addedByJSword && + (parameters.isShowTitles() || + "true".equalsIgnoreCase(attrs.getValue(OSISUtil.OSIS_ATTR_CANONICAL))); + + if (isShowTitle) { + // ESV has subType butNETtext has lower case subtype so concatenate both and search with contains() + String subtype = attrs.getValue(OSISUtil.OSIS_ATTR_SUBTYPE) + attrs.getValue(OSISUtil.OSIS_ATTR_SUBTYPE.toLowerCase()); + isMoveBeforeVerse = StringUtils.containsIgnoreCase(subtype, PREVERSE) || (!verseInfo.isTextSinceVerse && verseInfo.currentVerseNo > 0); + if (isMoveBeforeVerse) { + // section Titles normally come before a verse, so overwrite the, already written verse, which is rewritten on writer.finishedInserting + writer.beginInsertAt(verseInfo.positionToInsertBeforeVerse); + } + + // get title type from level + String titleClass = "heading" + TagHandlerHelper.getAttribute(OSISUtil.OSIS_ATTR_LEVEL, attrs, "1"); + + writer.write("<h1 class='" + titleClass + "'>"); + } else { + writer.setDontWrite(true); + } + } + + public void end() { + if (isShowTitle) { + writer.write("</h1>"); + if (isMoveBeforeVerse) { + // move positionToInsertBeforeVerse forward to after this title otherwise any subtitle will be above the title + verseInfo.positionToInsertBeforeVerse = writer.getPosition(); + writer.finishInserting(); + } + } else { + writer.setDontWrite(false); + } + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/VerseHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/VerseHandler.java new file mode 100644 index 0000000..45406e7 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/VerseHandler.java @@ -0,0 +1,81 @@ +package org.bspeice.minimalbible.service.format.osistohtml; + +import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlSaxHandler.VerseInfo; +import org.crosswire.jsword.book.OSISUtil; +import org.xml.sax.Attributes; + +import static org.bspeice.minimalbible.service.format.Constants.HTML; + +/** + * Write the verse number at the beginning of a verse + * Also handle verse per line + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.<br> + * The copyright to this program is held by it's author. + */ +public class VerseHandler { + + private OsisToHtmlParameters parameters; + + private VerseInfo verseInfo; + + private int writerRollbackPosition; + + private HtmlTextWriter writer; + + public VerseHandler(OsisToHtmlParameters parameters, VerseInfo verseInfo, HtmlTextWriter writer) { + this.parameters = parameters; + this.verseInfo = verseInfo; + this.writer = writer; + } + + + public String getTagName() { + return OSISUtil.OSIS_ELEMENT_VERSE; + } + + public void startAndUpdateVerse(Attributes attrs) { + writerRollbackPosition = writer.getPosition(); + + if (attrs != null) { + verseInfo.currentVerseNo = TagHandlerHelper.osisIdToVerseNum(attrs.getValue("", OSISUtil.OSIS_ATTR_OSISID)); + } else { + verseInfo.currentVerseNo++; + } + + if (parameters.isVersePerline()) { + //close preceding verse + if (verseInfo.currentVerseNo > 1) { + writer.write("</div>"); + } + // start current verse + writer.write("<div>"); + } + + writeVerse(verseInfo.currentVerseNo); + } + + public void end() { + if (!verseInfo.isTextSinceVerse) { + writer.removeAfter(writerRollbackPosition); + } + } + + private void writeVerse(int verseNo) { + verseInfo.positionToInsertBeforeVerse = writer.getPosition(); + + // The id is used to 'jump to' the verse using javascript so always need the verse tag with an id + // Do not show verse 0 + StringBuilder verseHtml = new StringBuilder(); + if (parameters.isShowVerseNumbers() && verseNo != 0) { + verseHtml.append(" <span class='verse' id='").append(verseNo).append("'>").append(verseNo).append("</span>").append(HTML.NBSP); + } else { + // we really want an empty span but that is illegal and causes problems such as incorrect verse calculation in Psalms + // so use something that will hopefully interfere as little as possible - a zero-width-space + // also put a space before it to allow a separation from the last word of previous verse or to be ignored if start of line + verseHtml.append(" <span class='verse' id='").append(verseNo).append("'/>​</span>"); + } + writer.write(verseHtml.toString()); + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/preprocessor/HebrewCharacterPreprocessor.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/preprocessor/HebrewCharacterPreprocessor.java new file mode 100644 index 0000000..97cd0cc --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/preprocessor/HebrewCharacterPreprocessor.java @@ -0,0 +1,99 @@ +package org.bspeice.minimalbible.service.format.osistohtml.preprocessor; + +import android.os.Build; + +import org.apache.commons.lang3.StringUtils; + +/** + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.<br> + * The copyright to this program is held by it's author. + */ +public class HebrewCharacterPreprocessor implements TextPreprocessor { + + // the following characters are not handled well in Android 2.2 & 2.3 and + // need special processing which for all except Sof Pasuq means removal + // puctuation char at the end of hebrew verses that looks like a ':' + private static final String HEBREW_SOF_PASUQ_CHAR = "\u05C3"; + // vowels are on the first row and cantillations on the second + private static final char[] HEBREW_VOWELS_AND_CANTILLATIONS = new char[]{ + '\u05B0', '\u05B1', '\u05B2', '\u05B3', '\u05B4', '\u05B5', + '\u05B6', '\u05B7', '\u05B8', '\u05B9', '\u05BA', '\u05BB', + '\u05BC', '\u05BD', '\u05BE', '\u05BF', '\u05C1', '\u05C2', + '\u0591', '\u0592', '\u0593', '\u0594', '\u0595', '\u0596', + '\u0597', '\u0598', '\u0599', '\u059A', '\u059B', '\u059C', + '\u059D', '\u059E', '\u05A0', '\u05A1', '\u05A2', '\u05A3', + '\u05A4', '\u05A5', '\u05A6', '\u05A7', '\u05A8', '\u05A9', + '\u05AA', '\u05AB', '\u05AC', '\u05AD', '\u05AE', '\u05AF'}; + + /** + * StringUtils methods only compare with a single char and hence create lots + * of temporary Strings This method compares with all chars and just creates + * one new string for each original string. This is to minimise memory + * overhead & gc. + * + * @param str + * @param removeChars + * @return + */ + public static String remove(String str, char[] removeChars) { + if (StringUtils.isEmpty(str) + || !StringUtils.containsAny(str, removeChars)) { + return str; + } + StringBuilder r = new StringBuilder(str.length()); +// for all chars in string + for (int i = 0; i < str.length(); i++) { + char strCur = str.charAt(i); +// compare with all chars to be removed + boolean matched = false; + for (int j = 0; j < removeChars.length && !matched; j++) { + if (removeChars[j] == strCur) { + matched = true; + } + } +// if current char does not match any in the list then add it to the + if (!matched) { + r.append(strCur); + } + } + return r.toString(); + } + + /** + * Some characters are not handled well in Android 2.2 & 2.3 and need + * special processing which for all except Sof Pasuq means removal + * + * @param text + * @return adjusted string + */ + @Override + public String process(String text) { + if (isVowelsBugFixed()) { + return text; + } else { + return doHebrewCharacterAdjustments(text); + } + } + + /** + * vowels rtl problem fixed in recent cyanogenmod and 4.0.3 + */ + private boolean isVowelsBugFixed() { + return Build.VERSION.SDK_INT >= 15 || //Build.VERSION_CODES.ICE_CREAM_SANDWICH_MR1; + (Build.VERSION.SDK_INT >= 10 && System.getProperty("os.version").contains("cyanogenmod")); // 10 is GINGERBREAD_MR1 (2.3.3) + } + + private String doHebrewCharacterAdjustments(String s) { + // remove Hebrew vowels because i) they confuse bidi and ii) they are + // not positioned correctly under/over the appropriate letter + // http://groups.google.com/group/android-contrib/browse_thread/thread/5b6b079f9ec7792a?pli=1 + s = remove(s, HEBREW_VOWELS_AND_CANTILLATIONS); + + // even without vowel points the : at the end of each verse confuses + // Android's bidi but specifying the char as rtl helps + s = s.replace(HEBREW_SOF_PASUQ_CHAR, "<span dir='rtl'>" + + HEBREW_SOF_PASUQ_CHAR + "</span> "); + return s; + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/preprocessor/TextPreprocessor.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/preprocessor/TextPreprocessor.java new file mode 100644 index 0000000..5cdb58a --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/preprocessor/TextPreprocessor.java @@ -0,0 +1,14 @@ +package org.bspeice.minimalbible.service.format.osistohtml.preprocessor; + +/** + * preprocess text content in the Sword module + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.<br> + * The copyright to this program is held by it's author. + */ +public interface TextPreprocessor { + /* convert module text to that required for display + */ + String process(String text); +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsHandler.java new file mode 100644 index 0000000..2726c92 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsHandler.java @@ -0,0 +1,170 @@ +package org.bspeice.minimalbible.service.format.osistohtml.strongs; + +import org.apache.commons.lang3.StringUtils; +import org.bspeice.minimalbible.service.format.osistohtml.HtmlTextWriter; +import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlParameters; +import org.bspeice.minimalbible.service.format.osistohtml.TagHandlerHelper; +import org.crosswire.jsword.book.OSISUtil; +import org.xml.sax.Attributes; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.bspeice.minimalbible.service.format.Constants.HTML; + +/** + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.<br> + * The copyright to this program is held by it's author. + */ +public class StrongsHandler { + + List<String> pendingStrongsAndMorphTags; + ; + private HtmlTextWriter writer; + + private OsisToHtmlParameters parameters; + + public StrongsHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) { + this.parameters = parameters; + this.writer = writer; + } + + public String getTagName() { + return "q"; + } + + public void start(Attributes attrs) { + if ((parameters.isShowStrongs() || parameters.isShowMorphology()) && TagHandlerHelper.isAttr(OSISUtil.ATTRIBUTE_W_LEMMA, attrs)) { + // Strongs & morphology references + // example of strongs refs: <w lemma="strong:H0430">God</w> <w lemma="strong:H0853 strong:H01254" morph="strongMorph:TH8804">created</w> + // better example, because we just use Robinson: <w lemma="strong:G652" morph="robinson:N-NSM" src="2">an apostle</w> + String strongsLemma = attrs.getValue(OSISUtil.ATTRIBUTE_W_LEMMA); + if (strongsLemma.startsWith(OSISUtil.LEMMA_STRONGS)) { + String morphology = attrs.getValue(OSISUtil.ATTRIBUTE_W_MORPH); + pendingStrongsAndMorphTags = getStrongsAndMorphTags(strongsLemma, morphology); + } + } + } + + public void end() { + if ((parameters.isShowStrongs() || parameters.isShowMorphology())) { + if (pendingStrongsAndMorphTags != null) { + for (int i = 0; i < pendingStrongsAndMorphTags.size(); i++) { + writer.write(HTML.SPACE); // separator between adjacent tags and words + writer.write(pendingStrongsAndMorphTags.get(i)); + } + writer.write(HTML.SPACE); // separator between adjacent tags and words + pendingStrongsAndMorphTags = null; + } + } + } + + /** + * Convert a Strongs lemma into a url E.g. lemmas "strong:H0430", + * "strong:H0853 strong:H01254" + * + * @return a single char to use as a note ref + */ + private List<String> getStrongsAndMorphTags(String strongsLemma, + String morphology) { + // there may occasionally be more than on ref so split them into a list + // of single refs + List<String> strongsTags = getStrongsTags(strongsLemma); + List<String> morphTags = getMorphTags(morphology); + + List<String> mergedStrongsAndMorphTags = new ArrayList<String>(); + + // each morph tag should relate to a Strongs tag so they should be same + // length but can't assume that + // merge the tags into the merge list + for (int i = 0; i < Math.max(strongsTags.size(), morphTags.size()); i++) { + StringBuilder merged = new StringBuilder(); + if (i < strongsTags.size()) { + merged.append(strongsTags.get(i)); + } + if (i < morphTags.size()) { + merged.append(morphTags.get(i)); + } + mergedStrongsAndMorphTags.add(merged.toString()); + } + + // for some reason the generic tags should come last and the order seems + // always reversed in other systems + // the second tag (once reversed) seems to relate to a missing word like + // eth + Collections.reverse(mergedStrongsAndMorphTags); + return mergedStrongsAndMorphTags; + } + + private List<String> getStrongsTags(String strongsLemma) { + // there may occasionally be more than on ref so split them into a list + // of single refs + List<String> strongsTags = new ArrayList<String>(); + + if (parameters.isShowStrongs()) { + String[] refList = strongsLemma.split(" "); + for (String ref : refList) { + // ignore if string doesn't start with "strong;" + if (ref.startsWith(OSISUtil.LEMMA_STRONGS) + && ref.length() > OSISUtil.LEMMA_STRONGS.length() + 2) { + // reduce ref like "strong:H0430" to "H0430" + ref = ref.substring(OSISUtil.LEMMA_STRONGS.length()); + + // select Hebrew or Greek protocol + String protocol = StrongsUtil.getStrongsProtocol(ref); + + if (protocol != null) { + // remove initial G or H + String strongsNumber = ref.substring(1); + + String strTag = StrongsUtil.createStrongsLink(protocol, strongsNumber); + + strongsTags.add(strTag); + } + } + } + } + return strongsTags; + } + + /** + * example of strongs and morphology, we just use Robinson: <w + * lemma="strong:G652" morph="robinson:N-NSM" src="2">an apostle</w> + * + * @param morphology + * @return + */ + private List<String> getMorphTags(String morphology) { + // there may occasionally be more than on ref so split them into a list + // of single refs + List<String> morphTags = new ArrayList<String>(); + + if (parameters.isShowMorphology()) { + if (StringUtils.isNotEmpty(morphology)) { + String[] refList = morphology.split(" "); + for (String ref : refList) { + // ignore if string doesn't start with "robinson" + if (ref.startsWith(OSISUtil.MORPH_ROBINSONS) + && ref.length() > OSISUtil.MORPH_ROBINSONS.length() + 2) { + // reduce ref like "robinson:N-NSM" to "N-NSM" for + // display + String display = ref.substring(OSISUtil.MORPH_ROBINSONS + .length()); + + StringBuilder tag = new StringBuilder(); + tag.append("<a href='").append(ref).append( + "' class='morphology'>").append(display) + .append("</a>"); + + morphTags.add(tag.toString()); + } + } + } + } + return morphTags; + } + + enum QType {quote, redLetter} +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsLinkCreator.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsLinkCreator.java new file mode 100644 index 0000000..720b8ef --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsLinkCreator.java @@ -0,0 +1,43 @@ +package org.bspeice.minimalbible.service.format.osistohtml.strongs; + +import org.bspeice.minimalbible.service.format.osistohtml.preprocessor.TextPreprocessor; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +/** + * Used with StrongsGreek and StrongsHebrew to find text like 'see HEBREW for 0433' and 'see GREEK for 1223' and converts to links + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.<br> + * The copyright to this program is held by it's author. + */ +public class StrongsLinkCreator implements TextPreprocessor { + + static Pattern patt = Pattern.compile("see (HEBREW|GREEK) for (\\d{1,5})"); //".*see ([HEBREW|GREEK]) for (\\d{1,5}).*"); + + public String process(String text) { + StringBuffer result = new StringBuffer(); + Matcher m = patt.matcher(text); + + while (m.find()) { + String lang = m.group(1); + String refNo = m.group(2); + + // select Hebrew or Greek protocol + String protocol = StrongsUtil.getStrongsProtocol(lang); + + // append the actual link to the Strongs ref + String refLink = StrongsUtil.createStrongsLink(protocol, refNo, m.group(), ""); + m.appendReplacement(result, refLink); + } + + // append any trailing space after the last match, or if no match then the whole string + m.appendTail(result); + + return result.toString(); + } + + +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsUtil.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsUtil.java new file mode 100644 index 0000000..334fee7 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/strongs/StrongsUtil.java @@ -0,0 +1,60 @@ +package org.bspeice.minimalbible.service.format.osistohtml.strongs; + +import org.apache.commons.lang3.StringUtils; + +import static org.bspeice.minimalbible.service.format.Constants.GREEK_DEF_PROTOCOL; +import static org.bspeice.minimalbible.service.format.Constants.HEBREW_DEF_PROTOCOL; + +/** + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.<br> + * The copyright to this program is held by it's author. + */ +public class StrongsUtil { + + private static final String DEFAULT_CSS_CLASS = "strongs"; + + /** + * create an html link for teh passed in strongs number and protocol + * + * @param protocol = G or H + * @param strongsNumber + * @return + */ + public static String createStrongsLink(String protocol, String strongsNumber) { + return createStrongsLink(protocol, strongsNumber, strongsNumber, DEFAULT_CSS_CLASS); + } + + public static String createStrongsLink(String protocol, String strongsNumber, String content, String cssClass) { + // pad with leading zeros to 5 characters + String paddedRef = StringUtils.leftPad(strongsNumber, 5, "0"); + + StringBuilder tag = new StringBuilder(); + // create opening tag for Strong's link + tag.append("<a href='"); + + // calculate uri e.g. H:01234 + tag.append(protocol).append(":").append(paddedRef); + + // set css class + tag.append("' class='" + cssClass + "'>"); + + // descriptive string + tag.append(content); + + // link closing tag + tag.append("</a>"); + + String strTag = tag.toString(); + return strTag; + } + + public static String getStrongsProtocol(String ref) { + if (ref.startsWith("H")) { + return HEBREW_DEF_PROTOCOL; + } else if (ref.startsWith("G")) { + return GREEK_DEF_PROTOCOL; + } + return null; + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/OrthHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/OrthHandler.java new file mode 100644 index 0000000..09d20c9 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/OrthHandler.java @@ -0,0 +1,34 @@ +package org.bspeice.minimalbible.service.format.osistohtml.tei; + +import org.bspeice.minimalbible.service.format.osistohtml.HiHandler; +import org.bspeice.minimalbible.service.format.osistohtml.HtmlTextWriter; +import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlParameters; +import org.xml.sax.Attributes; + + +/** + * Handle orth tag very similarly to hi tag + * <orth>?????????</orth> + * <orth rend="bold" type="trans">aneuthetos</orth> + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.<br> + * The copyright to this program is held by it's author. + */ +public class OrthHandler extends HiHandler { + + private final static String DEFAULT = "bold"; + + public OrthHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) { + super(parameters, writer); + } + + public String getTagName() { + return "orth"; + } + + public void start(Attributes attrs) { + String rend = attrs.getValue(TEIUtil.TEI_ATTR_REND); + start(rend, DEFAULT); + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/PronHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/PronHandler.java new file mode 100644 index 0000000..c661308 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/PronHandler.java @@ -0,0 +1,34 @@ +package org.bspeice.minimalbible.service.format.osistohtml.tei; + +import org.bspeice.minimalbible.service.format.osistohtml.HiHandler; +import org.bspeice.minimalbible.service.format.osistohtml.HtmlTextWriter; +import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlParameters; +import org.xml.sax.Attributes; + + +/** + * Handle orth tag very similarly to hi tag + * <orth>?????????</orth> + * <orth rend="bold" type="trans">aneuthetos</orth> + * + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.<br> + * The copyright to this program is held by it's author. + */ +public class PronHandler extends HiHandler { + + private final static String DEFAULT = "italic"; + + public PronHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) { + super(parameters, writer); + } + + public String getTagName() { + return "pron"; + } + + public void start(Attributes attrs) { + String rend = attrs.getValue(TEIUtil.TEI_ATTR_REND); + start(rend, DEFAULT); + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/RefHandler.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/RefHandler.java new file mode 100644 index 0000000..e8d0eff --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/RefHandler.java @@ -0,0 +1,24 @@ +package org.bspeice.minimalbible.service.format.osistohtml.tei; + +import org.bspeice.minimalbible.service.format.osistohtml.HtmlTextWriter; +import org.bspeice.minimalbible.service.format.osistohtml.NoteHandler; +import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlParameters; +import org.bspeice.minimalbible.service.format.osistohtml.ReferenceHandler; +import org.xml.sax.Attributes; + +/** + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.<br> + * The copyright to this program is held by it's author. + */ +public class RefHandler extends ReferenceHandler { + + public RefHandler(OsisToHtmlParameters osisToHtmlParameters, NoteHandler noteHandler, HtmlTextWriter theWriter) { + super(osisToHtmlParameters, noteHandler, theWriter); + } + + public void start(Attributes attrs) { + String target = attrs.getValue(TEIUtil.TEI_ATTR_TARGET); + start(target); + } +} diff --git a/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/TEIUtil.java b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/TEIUtil.java new file mode 100644 index 0000000..a04a588 --- /dev/null +++ b/app/src/main/java/org/bspeice/minimalbible/service/format/osistohtml/tei/TEIUtil.java @@ -0,0 +1,18 @@ +package org.bspeice.minimalbible.service.format.osistohtml.tei; + +/** + * @author Martin Denham [mjdenham at gmail dot com] + * @see gnu.lgpl.License for license details.<br> + * The copyright to this program is held by it's author. + */ +public class TEIUtil { + + // E.g. <ref target="StrongsHebrew:00411">H411</ref> taken from StrongsHebrew:00428 + public static final String TEI_ELEMENT_REF = "ref"; + public static final String TEI_ATTR_TARGET = "target"; + + public static final String TEI_ELEMENT_ORTH = "orth"; + public static final String TEI_ELEMENT_PRON = "pron"; + // the way tag contents are rendered e.g. 'bold'. 'italic' + public static final String TEI_ATTR_REND = "rend"; +}