Add in the And-bible OSIS parser

Took me a while to do the research I needed, sorry it's been so long. Largely useless currently, but gives me a starting place to adapt.
This commit is contained in:
Bradlee Speice 2014-08-22 19:53:57 -04:00
parent 6271cc9626
commit c0c0643b84
30 changed files with 2441 additions and 0 deletions

View File

@ -67,6 +67,7 @@ dependencies {
compile 'com.readystatesoftware.systembartint:systembartint:+' compile 'com.readystatesoftware.systembartint:systembartint:+'
compile 'com.netflix.rxjava:rxjava-android:+' compile 'com.netflix.rxjava:rxjava-android:+'
compile 'com.android.support:appcompat-v7:20.+' compile 'com.android.support:appcompat-v7:20.+'
compile 'org.apache.commons:commons-lang3:+'
androidTestCompile 'com.jayway.awaitility:awaitility:+' androidTestCompile 'com.jayway.awaitility:awaitility:+'
androidTestCompile 'org.mockito:mockito-core:+' androidTestCompile 'org.mockito:mockito-core:+'

View File

@ -64,6 +64,7 @@ public class VerseLookupService implements Action1<Verse> {
/** /**
* Perform the ugly work of getting the actual data for a verse * Perform the ugly work of getting the actual data for a verse
*
* @param v * @param v
* @return * @return
*/ */
@ -71,11 +72,15 @@ public class VerseLookupService implements Action1<Verse> {
BookData bookData = new BookData(book, v); BookData bookData = new BookData(book, v);
try { try {
SAXEventProvider provider = bookData.getSAXEventProvider(); SAXEventProvider provider = bookData.getSAXEventProvider();
// provider.provideSAXEvents(new OsisParser());
return provider.toString(); return provider.toString();
} catch (BookException e) { } catch (BookException e) {
e.printStackTrace(); e.printStackTrace();
return "Unable to locate " + v.toString() + "!"; return "Unable to locate " + v.toString() + "!";
// } catch (SAXException e) {
// e.printStackTrace();
} }
// return null;
} }
/** /**

View File

@ -0,0 +1,28 @@
package org.bspeice.minimalbible.service.format;
/**
* see http://www.crosswire.org/wiki/Frontends:URI_Standard
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class Constants {
// Strings for URL protocols/URI schemes
public static final String SWORD_PROTOCOL = "sword"; //$NON-NLS-1$
public static final String BIBLE_PROTOCOL = "bible"; //$NON-NLS-1$
public static final String DICTIONARY_PROTOCOL = "dict"; //$NON-NLS-1$
public static final String GREEK_DEF_PROTOCOL = "gdef"; //$NON-NLS-1$
public static final String HEBREW_DEF_PROTOCOL = "hdef"; //$NON-NLS-1$
public static final String ALL_GREEK_OCCURRENCES_PROTOCOL = "allgoccur"; //$NON-NLS-1$
public static final String ALL_HEBREW_OCCURRENCES_PROTOCOL = "allhoccur"; //$NON-NLS-1$
public static final String ROBINSON_GREEK_MORPH_PROTOCOL = "robinson"; //$NON-NLS-1$
public static final String HEBREW_MORPH_PROTOCOL = "hmorph"; //$NON-NLS-1$
public static final String COMMENTARY_PROTOCOL = "comment"; //$NON-NLS-1$
public static class HTML {
public static final String NBSP = "&#160;";
public static final String SPACE = " ";
public static final String BR = "<br />";
}
}

View File

@ -0,0 +1,31 @@
package org.bspeice.minimalbible.service.format;
/**
* Info on a note or cross reference
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class Note {
public static final String SUMMARY = "summary";
;
public static final String DETAIL = "detail";
private static final String TAG = "Note";
private String noteRef;
private String noteText;
public Note(int verseNo, String noteRef, String noteText, NoteType noteType, String osisRef) {
super();
this.noteRef = noteRef;
this.noteText = noteText;
}
@Override
public String toString() {
return noteRef + ":" + noteText;
}
public enum NoteType {TYPE_GENERAL, TYPE_REFERENCE}
}

View File

@ -0,0 +1,91 @@
package org.bspeice.minimalbible.service.format;
import org.bspeice.minimalbible.service.format.osistohtml.HtmlTextWriter;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
/**
* Convert OSIS input into Canonical text (used when creating search index)
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class OsisSaxHandler extends DefaultHandler {
// debugging
private boolean isDebugMode = false;
private HtmlTextWriter writer;
public OsisSaxHandler() {
writer = new HtmlTextWriter();
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#toString()
*/
/* @Override */
public String toString() {
return writer.getHtml();
}
protected String getName(String eName, String qName) {
if (eName != null && eName.length() > 0) {
return eName;
} else {
return qName; // not namespace-aware
}
}
protected void write(String s) {
writer.write(s);
}
/**
* check the value of the specified attribute and return true if same as checkvalue
*
* @param attrs
* @param attrName
* @param checkValue
* @return
*/
protected boolean isAttrValue(Attributes attrs, String attrName, String checkValue) {
if (attrs == null) {
return false;
}
String value = attrs.getValue(attrName);
return checkValue.equals(value);
}
protected void debug(String name, Attributes attrs, boolean isStartTag) {
if (isDebugMode) {
write("*" + name);
if (attrs != null) {
for (int i = 0; i < attrs.getLength(); i++) {
String aName = attrs.getLocalName(i); // Attr name
if ("".equals(aName)) aName = attrs.getQName(i);
write(" ");
write(aName + "=\"" + attrs.getValue(i) + "\"");
}
}
write("*\n");
}
}
public void setDebugMode(boolean isDebugMode) {
this.isDebugMode = isDebugMode;
}
protected void reset() {
writer.reset();
}
public HtmlTextWriter getWriter() {
return writer;
}
}

View File

@ -0,0 +1,134 @@
package org.bspeice.minimalbible.service.format;
import org.bspeice.minimalbible.service.format.osistohtml.TagHandlerHelper;
import org.crosswire.jsword.book.OSISUtil;
import org.xml.sax.Attributes;
import java.util.Stack;
/**
* Convert OSIS input into Canonical text (used when creating search index)
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class OsisToCanonicalTextSaxHandler extends OsisSaxHandler {
@SuppressWarnings("unused")
private int currentVerseNo;
private Stack<CONTENT_STATE> writeContentStack = new Stack<CONTENT_STATE>();
public OsisToCanonicalTextSaxHandler() {
super();
}
;
@Override
public void startDocument() {
reset();
// default mode is to write
writeContentStack.push(CONTENT_STATE.WRITE);
}
/*
*Called when the Parser Completes parsing the Current XML File.
*/
@Override
public void endDocument() {
// pop initial value
writeContentStack.pop();
assert (writeContentStack.isEmpty());
}
/*
* Called when the starting of the Element is reached. For Example if we have Tag
* called <Title> ... </Title>, then this method is called when <Title> tag is
* Encountered while parsing the Current XML File. The AttributeList Parameter has
* the list of all Attributes declared for the Current Element in the XML File.
*/
@Override
public void startElement(String namespaceURI,
String sName, // simple name
String qName, // qualified name
Attributes attrs) {
String name = getName(sName, qName); // element name
debug(name, attrs, true);
// if encountering either a verse tag or if the current tag is marked as being canonical then turn on writing
if (isAttrValue(attrs, "canonical", "true")) {
writeContentStack.push(CONTENT_STATE.WRITE);
} else if (name.equals(OSISUtil.OSIS_ELEMENT_VERSE)) {
if (attrs != null) {
currentVerseNo = TagHandlerHelper.osisIdToVerseNum(attrs.getValue("", OSISUtil.OSIS_ATTR_OSISID));
}
writeContentStack.push(CONTENT_STATE.WRITE);
} else if (name.equals(OSISUtil.OSIS_ELEMENT_NOTE)) {
writeContentStack.push(CONTENT_STATE.IGNORE);
} else if (name.equals(OSISUtil.OSIS_ELEMENT_TITLE)) {
writeContentStack.push(CONTENT_STATE.IGNORE);
} else if (name.equals(OSISUtil.OSIS_ELEMENT_REFERENCE)) {
writeContentStack.push(CONTENT_STATE.IGNORE);
} else if (name.equals(OSISUtil.OSIS_ELEMENT_L) ||
name.equals(OSISUtil.OSIS_ELEMENT_LB) ||
name.equals(OSISUtil.OSIS_ELEMENT_P)) {
// these occur in Psalms to separate different paragraphs.
// A space is needed for TTS not to be confused by punctuation with a missing space like 'toward us,and the'
write(" ");
//if writing then continue. Also if ignoring then continue
writeContentStack.push(writeContentStack.peek());
} else {
// unknown tags rely on parent tag to determine if content is canonical e.g. the italic tag in the middle of canonical text
writeContentStack.push(writeContentStack.peek());
}
}
/*
* Called when the Ending of the current Element is reached. For example in the
* above explanation, this method is called when </Title> tag is reached
*/
@Override
public void endElement(String namespaceURI,
String sName, // simple name
String qName // qualified name
) {
String name = getName(sName, qName);
debug(name, null, false);
if (name.equals(OSISUtil.OSIS_ELEMENT_VERSE)) {
// A space is needed to separate one verse from the next, otherwise the 2 verses butt up against each other
// which looks bad and confuses TTS
write(" ");
}
// now this tag has ended pop the write/ignore state for the parent tag
writeContentStack.pop();
}
/*
* Handle characters encountered in tags
*/
@Override
public void characters(char buf[], int offset, int len) {
if (CONTENT_STATE.WRITE.equals(writeContentStack.peek())) {
String s = new String(buf, offset, len);
write(s);
}
}
protected void writeContent(boolean writeContent) {
if (writeContent) {
writeContentStack.push(CONTENT_STATE.WRITE);
} else {
writeContentStack.push(CONTENT_STATE.IGNORE);
}
}
private enum CONTENT_STATE {WRITE, IGNORE}
}

View File

@ -0,0 +1,66 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlSaxHandler.VerseInfo;
import org.crosswire.jsword.passage.Verse;
import java.util.HashSet;
import java.util.Set;
/**
* Display an img if the current verse has MyNote
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class BookmarkMarker {
private Set<Integer> bookmarkedVerses = new HashSet<Integer>();
private OsisToHtmlParameters parameters;
private VerseInfo verseInfo;
private HtmlTextWriter writer;
private boolean bookmarkOpenTagWritten = false;
public BookmarkMarker(OsisToHtmlParameters parameters, VerseInfo verseInfo, HtmlTextWriter writer) {
this.parameters = parameters;
this.verseInfo = verseInfo;
this.writer = writer;
// create hashset of verses to optimise verse note lookup
bookmarkedVerses.clear();
if (parameters.getVersesWithBookmarks() != null) {
for (Verse verse : parameters.getVersesWithBookmarks()) {
bookmarkedVerses.add(verse.getVerse());
}
}
}
public String getTagName() {
return "";
}
/**
* just after verse start tag
*/
public void start() {
if (bookmarkedVerses != null && parameters.isShowBookmarks()) {
if (bookmarkedVerses.contains(verseInfo.currentVerseNo)) {
writer.write("<img src='file:///android_asset/images/GoldStar16x16.png' class='myNoteImg'/>");
// writer.write("<span class='bookmark'>");
bookmarkOpenTagWritten = true;
}
}
}
public void end() {
if (bookmarkOpenTagWritten) {
// writer.write("</span>");
bookmarkOpenTagWritten = false;
}
}
}

View File

@ -0,0 +1,39 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import org.apache.commons.lang3.StringUtils;
import org.crosswire.jsword.book.OSISUtil;
import org.xml.sax.Attributes;
/**
* Handle <figure src="imagefile.jpg" /> to display pictures
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class FigureHandler {
private HtmlTextWriter writer;
private OsisToHtmlParameters parameters;
public FigureHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) {
this.parameters = parameters;
this.writer = writer;
}
public String getTagName() {
return "figure";
}
public void start(Attributes attrs) {
// Refer to Gen 3:14 in ESV for example use of type=x-indent
String src = attrs.getValue(OSISUtil.ATTRIBUTE_FIGURE_SRC);
if (StringUtils.isNotEmpty(src)) {
writer.write("<img src='" + parameters.getModuleBasePath() + "/" + src + "'/>");
}
}
public void end() {
}
}

View File

@ -0,0 +1,66 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import org.crosswire.jsword.book.OSISUtil;
import org.xml.sax.Attributes;
import java.util.Arrays;
import java.util.List;
import static org.crosswire.jsword.book.OSISUtil.HI_ACROSTIC;
import static org.crosswire.jsword.book.OSISUtil.HI_BOLD;
import static org.crosswire.jsword.book.OSISUtil.HI_EMPHASIS;
import static org.crosswire.jsword.book.OSISUtil.HI_ILLUMINATED;
import static org.crosswire.jsword.book.OSISUtil.HI_ITALIC;
import static org.crosswire.jsword.book.OSISUtil.HI_LINETHROUGH;
import static org.crosswire.jsword.book.OSISUtil.HI_NORMAL;
import static org.crosswire.jsword.book.OSISUtil.HI_SMALL_CAPS;
import static org.crosswire.jsword.book.OSISUtil.HI_SUB;
import static org.crosswire.jsword.book.OSISUtil.HI_SUPER;
import static org.crosswire.jsword.book.OSISUtil.HI_UNDERLINE;
/**
* Handle hi element e.g. <hi type="italic">the child with his mother Mary</hi>
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class HiHandler {
// possible values of type attribute
private static final List<String> HI_TYPE_LIST = Arrays.asList(new String[]{HI_ACROSTIC, HI_BOLD, HI_EMPHASIS, HI_ILLUMINATED, HI_ITALIC, HI_LINETHROUGH, HI_NORMAL, HI_SMALL_CAPS, HI_SUB, HI_SUPER, HI_UNDERLINE});
private final static String DEFAULT = "bold";
private HtmlTextWriter writer;
public HiHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) {
this.writer = writer;
}
public String getTagName() {
return "hi";
}
public void start(Attributes attrs) {
String type = attrs.getValue(OSISUtil.OSIS_ATTR_TYPE);
start(type, DEFAULT);
}
public void start(String style, String defaultStyle) {
if (style == null || !HI_TYPE_LIST.contains(style)) {
style = defaultStyle;
}
// add any styles that are relevant - the tag name and the style attribute
String cssClasses = getTagName() + " hi_" + style;
// start span with CSS class of 'hi_*' e.g. hi_bold
writer.write("<span class=\'" + cssClasses + "\'>");
}
public void end() {
writer.write("</span>");
}
}

View File

@ -0,0 +1,91 @@
package org.bspeice.minimalbible.service.format.osistohtml;
/**
* Write characters out to a StringBuilder - used while creating html for display
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's authors.
*/
public class HtmlTextWriter {
private StringBuilder writer;
private int dontWriteRequestCount = 0;
private int writeTempStoreRequestCount = 0;
private StringBuilder tempStore = new StringBuilder();
// allow insert at a certain position
private String overwrittenString = "";
public HtmlTextWriter() {
writer = new StringBuilder();
}
public void write(String htmlText) {
if (dontWriteRequestCount > 0) {
// ignore all text
} else if (writeTempStoreRequestCount == 0) {
writer.append(htmlText);
} else {
tempStore.append(htmlText);
}
}
/**
* allow pre-verse headings
*/
public void beginInsertAt(int insertOffset) {
overwrittenString = writer.substring(insertOffset);
writer.delete(insertOffset, writer.length());
}
/**
* finish inserting and restore overwritten tail of string
*/
public void finishInserting() {
writer.append(overwrittenString);
overwrittenString = "";
}
public int getPosition() {
return writer.length();
}
public void removeAfter(int position) {
writer.delete(position, writer.length());
}
public void reset() {
writer.setLength(0);
}
public void writeToTempStore() {
writeTempStoreRequestCount++;
}
public void finishWritingToTempStore() {
writeTempStoreRequestCount--;
}
public void clearTempStore() {
tempStore.delete(0, tempStore.length());
}
public String getTempStoreString() {
return tempStore.toString();
}
public String getHtml() {
return writer.toString();
}
public void setDontWrite(boolean dontWrite) {
if (dontWrite) {
dontWriteRequestCount++;
} else {
dontWriteRequestCount--;
}
}
}

View File

@ -0,0 +1,56 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import org.xml.sax.Attributes;
import java.util.Stack;
/**
* The lg or "line group" element is used to contain any group of poetic lines. Poetic lines are handled at the line level by And Bible, not line group
* so this class does nothing.
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
@SuppressWarnings("unused")
public class LGHandler {
private HtmlTextWriter writer;
private OsisToHtmlParameters parameters;
private Stack<LGType> stack = new Stack<LGType>();
public LGHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) {
this.parameters = parameters;
this.writer = writer;
}
public String getTagName() {
return "lg";
}
public void start(Attributes attrs) {
// ignore this for now because it is untested
// LGType lgtype = LGType.IGNORE;
// if (TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_SID, attrs) ||
// TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_EID, attrs)) {
// lgtype = LGType.IGNORE;
// } else {
// // allow spacing around groups of poetry
// writer.write("<div class='lg'>");
// lgtype = LGType.DIV;
// }
// stack.push(lgtype);
}
public void end() {
// LGType lgtype = stack.pop();
// if (LGType.DIV.equals(lgtype)) {
// writer.write("</div>");
// }
}
enum LGType {DIV, IGNORE}
}

View File

@ -0,0 +1,84 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import android.util.Log;
import org.apache.commons.lang3.StringUtils;
import org.crosswire.jsword.book.OSISUtil;
import org.xml.sax.Attributes;
import java.util.Stack;
import static org.bspeice.minimalbible.service.format.Constants.HTML;
/**
* This can either signify a quote or Red Letter
* Example from ESV Prov 19:1
* <l sID="x9938"/>...<l eID="x9938" type="x-br"/><l sID="x9939" type="x-indent"/>..<l eID="x9939" type="x-br"/>
* <p/>
* Apparently quotation marks are not supposed to appear in the KJV (https://sites.google.com/site/kjvtoday/home/Features-of-the-KJV/quotation-marks)
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class LHandler {
private static String indent_html = HTML.NBSP + HTML.NBSP;
private HtmlTextWriter writer;
private Stack<LType> stack = new Stack<LType>();
public LHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) {
this.writer = writer;
int indentCharCount = 4; // TODO: Set a standard value for this
indent_html = StringUtils.repeat(HTML.NBSP, indentCharCount);
}
public String getTagName() {
return "l";
}
public void startL(Attributes attrs) {
// Refer to Gen 3:14 in ESV for example use of type=x-indent
String type = attrs.getValue(OSISUtil.OSIS_ATTR_TYPE);
int level = TagHandlerHelper.getAttribute(OSISUtil.OSIS_ATTR_LEVEL, attrs, 1);
// make numIndents default to zero
int numIndents = Math.max(0, level - 1);
LType ltype = LType.IGNORE;
if (StringUtils.isNotEmpty(type)) {
if (type.contains("indent")) {
// this tag is specifically for indenting so ensure there is an indent
numIndents = numIndents + 1;
writer.write(StringUtils.repeat(indent_html, numIndents));
ltype = LType.INDENT;
} else if (type.contains("br")) {
writer.write(HTML.BR);
ltype = LType.BR;
} else {
ltype = LType.IGNORE;
Log.d("LHandler", "Unknown <l> tag type:" + type);
}
} else if (TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_SID, attrs)) {
writer.write(StringUtils.repeat(indent_html, numIndents));
ltype = LType.IGNORE;
} else if (TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_EID, attrs)) {
// e.g. Isaiah 40:12
writer.write(HTML.BR);
ltype = LType.BR;
} else {
//simple <l>
writer.write(StringUtils.repeat(indent_html, numIndents));
ltype = LType.END_BR;
}
stack.push(ltype);
}
public void endL() {
LType type = stack.pop();
if (LType.END_BR.equals(type)) {
writer.write(HTML.BR);
}
}
enum LType {INDENT, BR, END_BR, IGNORE}
}

View File

@ -0,0 +1,61 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlSaxHandler.VerseInfo;
import org.crosswire.jsword.passage.Key;
import org.crosswire.jsword.passage.KeyUtil;
import org.crosswire.jsword.passage.Verse;
import java.util.HashSet;
import java.util.Set;
/**
* Display an img if the current verse has MyNote
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class MyNoteMarker {
private Set<Integer> myNoteVerses = new HashSet<Integer>();
private OsisToHtmlParameters parameters;
private VerseInfo verseInfo;
private HtmlTextWriter writer;
public MyNoteMarker(OsisToHtmlParameters parameters, VerseInfo verseInfo, HtmlTextWriter writer) {
this.parameters = parameters;
this.verseInfo = verseInfo;
this.writer = writer;
// create hashmap of verses to optimise verse note lookup
myNoteVerses.clear();
if (parameters.getVersesWithNotes() != null) {
for (Key key : parameters.getVersesWithNotes()) {
Verse verse = KeyUtil.getVerse(key);
myNoteVerses.add(verse.getVerse());
}
}
}
public String getTagName() {
return "";
}
/**
* just after verse start tag
*/
public void start() {
if (myNoteVerses != null && parameters.isShowMyNotes()) {
if (myNoteVerses.contains(verseInfo.currentVerseNo)) {
writer.write("<img src='file:///android_asset/images/pencil16x16.png' class='myNoteImg'/>");
}
}
}
public void end() {
}
}

View File

@ -0,0 +1,141 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import org.apache.commons.lang3.StringUtils;
import org.bspeice.minimalbible.service.format.Note;
import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlSaxHandler.VerseInfo;
import org.xml.sax.Attributes;
import java.util.ArrayList;
import java.util.List;
import static org.bspeice.minimalbible.service.format.Note.NoteType;
/**
* Convert OSIS tags into html tags
* <p/>
* Example OSIS tags from KJV Ps 119 v1 showing title, w, note
* <title canonical="true" subType="x-preverse" type="section">
* <foreign n="?">ALEPH.</foreign>
* </title>
* <w lemma="strong:H0835">Blessed</w> <transChange type="added">are</transChange> <w lemma="strong:H08549">the undefiled</w>
* ... <w lemma="strong:H01980" morph="strongMorph:TH8802">who walk</w>
* ... <w lemma="strong:H03068">of the <seg><divineName>Lord</divineName></seg></w>.
* <note type="study">undefiled: or, perfect, or, sincere</note>
* <p/>
* Example of notes cross references from ESV
* In the <note n="a" osisID="Gen.1.1!crossReference.a" osisRef="Gen.1.1" type="crossReference"><reference osisRef="Job.38.4-Job.38.7">Job 38:4-7</reference>; <reference osisRef="Ps.33.6">Ps. 33:6</reference>; <reference osisRef="Ps.136.5">136:5</reference>; <reference osisRef="Isa.42.5">Isa. 42:5</reference>; <reference osisRef="Isa.45.18">45:18</reference>; <reference osisRef="John.1.1-John.1.3">John 1:1-3</reference>; <reference osisRef="Acts.14.15">Acts 14:15</reference>; <reference osisRef="Acts.17.24">17:24</reference>; <reference osisRef="Col.1.16-Col.1.17">Col. 1:16, 17</reference>; <reference osisRef="Heb.1.10">Heb. 1:10</reference>; <reference osisRef="Heb.11.3">11:3</reference>; <reference osisRef="Rev.4.11">Rev. 4:11</reference></note>beginning
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class NoteHandler {
private OsisToHtmlParameters parameters;
private VerseInfo verseInfo;
private int noteCount = 0;
//todo temporarily use a string but later switch to Map<int,String> of verse->note
private List<Note> notesList = new ArrayList<Note>();
private boolean isInNote = false;
private String currentNoteRef;
private HtmlTextWriter writer;
public NoteHandler(OsisToHtmlParameters osisToHtmlParameters, VerseInfo verseInfo, HtmlTextWriter theWriter) {
this.parameters = osisToHtmlParameters;
this.verseInfo = verseInfo;
this.writer = theWriter;
}
public void startNote(Attributes attrs) {
isInNote = true;
currentNoteRef = getNoteRef(attrs);
writeNoteRef(currentNoteRef);
// prepare to fetch the actual note into the notes repo
writer.writeToTempStore();
}
/*
* Called when the Ending of the current Element is reached. For example in the
* above explanation, this method is called when </Title> tag is reached
*/
public void endNote() {
String noteText = writer.getTempStoreString();
if (noteText.length() > 0) {
if (!StringUtils.containsOnly(noteText, "[];().,")) {
Note note = new Note(verseInfo.currentVerseNo, currentNoteRef, noteText, NoteType.TYPE_GENERAL, null);
notesList.add(note);
}
// and clear the buffer
writer.clearTempStore();
}
isInNote = false;
writer.finishWritingToTempStore();
}
/**
* a reference is finished and now the note must be added
*/
public void addNoteForReference(String refText, String osisRef) {
// add teh html to show a note character in the (bible) text
// a few modules like HunUj have refs in the text but not surrounded by a Note tag (like esv) so need to add Note here
// special code to cope with HunUj problem
if (parameters.isAutoWrapUnwrappedRefsInNote() && !isInNote()) {
currentNoteRef = createNoteRef();
writeNoteRef(currentNoteRef);
}
// record the note information to show if user requests to see notes for this verse
if (isInNote || parameters.isAutoWrapUnwrappedRefsInNote()) {
Note note = new Note(verseInfo.currentVerseNo, currentNoteRef, refText, NoteType.TYPE_REFERENCE, osisRef);
notesList.add(note);
}
}
/**
* either use the 'n' attribute for the note ref or just get the next character in a list a-z
*
* @return a single char to use as a note ref
*/
private String getNoteRef(Attributes attrs) {
// if the ref is specified as an attribute then use that
String noteRef = attrs.getValue("n");
if (StringUtils.isEmpty(noteRef)) {
noteRef = createNoteRef();
}
return noteRef;
}
/**
* either use the character passed in or get the next character in a list a-z
*
* @return a single char to use as a note ref
*/
private String createNoteRef() {
// else just get the next char
int inta = (int) 'a';
char nextNoteChar = (char) (inta + (noteCount++ % 26));
return String.valueOf(nextNoteChar);
}
/**
* write noteref html to outputstream
*/
private void writeNoteRef(String noteRef) {
if (parameters.isShowNotes()) {
writer.write("<span class='noteRef'>" + noteRef + "</span> ");
}
}
public boolean isInNote() {
return isInNote;
}
public List<Note> getNotesList() {
return notesList;
}
}

View File

@ -0,0 +1,225 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import org.crosswire.jsword.passage.Key;
import org.crosswire.jsword.passage.KeyUtil;
import org.crosswire.jsword.passage.Verse;
import org.crosswire.jsword.versification.Versification;
import org.crosswire.jsword.versification.system.Versifications;
import java.net.URI;
import java.util.List;
/**
* Parameters passed into the Osis to HTML converter
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class OsisToHtmlParameters {
private String languageCode = "en";
private boolean isLeftToRight = true;
private boolean isShowTitles = true;
private boolean isShowVerseNumbers = false;
private boolean isVersePerline = false;
private boolean isShowMyNotes = false;
private boolean isShowBookmarks = false;
private boolean isShowNotes = false;
private boolean isAutoWrapUnwrappedRefsInNote = false;
// used as a basis if a reference has only chapter and no book
private Verse basisRef;
private Versification documentVersification;
private String font;
private String cssClassForCustomFont;
private boolean isShowStrongs = false;
private boolean isShowMorphology = false;
private boolean isRedLetter = false;
private String extraStylesheet;
private String extraFooter;
private boolean convertStrongsRefsToLinks;
private List<Verse> versesWithNotes;
private List<Verse> versesWithBookmarks;
private URI moduleBasePath;
public String getLanguageCode() {
return languageCode;
}
public void setLanguageCode(String languageCode) {
this.languageCode = languageCode;
}
public boolean isLeftToRight() {
return isLeftToRight;
}
public void setLeftToRight(boolean isLeftToRight) {
this.isLeftToRight = isLeftToRight;
}
public boolean isShowTitles() {
return isShowTitles;
}
public void setShowTitles(boolean isShowTitles) {
this.isShowTitles = isShowTitles;
}
public boolean isShowVerseNumbers() {
return isShowVerseNumbers;
}
public void setShowVerseNumbers(boolean isShowVerseNumbers) {
this.isShowVerseNumbers = isShowVerseNumbers;
}
public boolean isVersePerline() {
return isVersePerline;
}
public void setVersePerline(boolean isVersePerline) {
this.isVersePerline = isVersePerline;
}
public boolean isShowMyNotes() {
return isShowMyNotes;
}
public void setShowMyNotes(boolean isShowMyNotes) {
this.isShowMyNotes = isShowMyNotes;
}
public boolean isShowBookmarks() {
return isShowBookmarks;
}
public void setShowBookmarks(boolean isShowBookmarks) {
this.isShowBookmarks = isShowBookmarks;
}
public boolean isShowNotes() {
return isShowNotes;
}
public void setShowNotes(boolean isShowNotes) {
this.isShowNotes = isShowNotes;
}
public boolean isAutoWrapUnwrappedRefsInNote() {
return isAutoWrapUnwrappedRefsInNote;
}
public void setAutoWrapUnwrappedRefsInNote(boolean isAutoWrapUnwrappedRefsInNote) {
this.isAutoWrapUnwrappedRefsInNote = isAutoWrapUnwrappedRefsInNote;
}
public boolean isShowStrongs() {
return isShowStrongs;
}
public void setShowStrongs(boolean isShowStrongs) {
this.isShowStrongs = isShowStrongs;
}
public boolean isShowMorphology() {
return isShowMorphology;
}
public void setShowMorphology(boolean isShowMorphology) {
this.isShowMorphology = isShowMorphology;
}
public String getExtraStylesheet() {
return extraStylesheet;
}
public void setExtraStylesheet(String extraStylesheet) {
this.extraStylesheet = extraStylesheet;
}
public String getExtraFooter() {
return extraFooter;
}
public void setExtraFooter(String extraFooter) {
this.extraFooter = extraFooter;
}
public Verse getBasisRef() {
return basisRef;
}
public void setBasisRef(Key basisRef) {
// KeyUtil always returns a Verse even if it is only Gen 1:1
this.basisRef = KeyUtil.getVerse(basisRef);
}
public boolean isRedLetter() {
return isRedLetter;
}
public void setRedLetter(boolean isRedLetter) {
this.isRedLetter = isRedLetter;
}
public String getFont() {
return font;
}
public void setFont(String font) {
this.font = font;
}
public String getCssClassForCustomFont() {
return cssClassForCustomFont;
}
public void setCssClassForCustomFont(String cssClassForCustomFont) {
this.cssClassForCustomFont = cssClassForCustomFont;
}
public boolean isConvertStrongsRefsToLinks() {
return convertStrongsRefsToLinks;
}
public void setConvertStrongsRefsToLinks(boolean convertStrongsRefsToLinks) {
this.convertStrongsRefsToLinks = convertStrongsRefsToLinks;
}
public List<Verse> getVersesWithNotes() {
return versesWithNotes;
}
public void setVersesWithNotes(List<Verse> versesWithNotes) {
this.versesWithNotes = versesWithNotes;
}
public List<Verse> getVersesWithBookmarks() {
return versesWithBookmarks;
}
public void setVersesWithBookmarks(List<Verse> versesWithBookmarks) {
this.versesWithBookmarks = versesWithBookmarks;
}
public URI getModuleBasePath() {
return moduleBasePath;
}
public void setModuleBasePath(URI moduleBasePath) {
this.moduleBasePath = moduleBasePath;
}
public Versification getDocumentVersification() {
if (documentVersification != null) {
return documentVersification;
} else {
return Versifications.instance().getVersification(Versifications.DEFAULT_V11N);
}
}
public void setDocumentVersification(Versification documentVersification) {
this.documentVersification = documentVersification;
}
}

View File

@ -0,0 +1,341 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import android.util.Log;
import org.apache.commons.lang3.StringUtils;
import org.bspeice.minimalbible.service.format.OsisSaxHandler;
import org.bspeice.minimalbible.service.format.osistohtml.preprocessor.HebrewCharacterPreprocessor;
import org.bspeice.minimalbible.service.format.osistohtml.preprocessor.TextPreprocessor;
import org.bspeice.minimalbible.service.format.osistohtml.strongs.StrongsHandler;
import org.bspeice.minimalbible.service.format.osistohtml.strongs.StrongsLinkCreator;
import org.bspeice.minimalbible.service.format.osistohtml.tei.OrthHandler;
import org.bspeice.minimalbible.service.format.osistohtml.tei.PronHandler;
import org.bspeice.minimalbible.service.format.osistohtml.tei.RefHandler;
import org.bspeice.minimalbible.service.format.osistohtml.tei.TEIUtil;
import org.crosswire.jsword.book.OSISUtil;
import org.xml.sax.Attributes;
import static org.bspeice.minimalbible.service.format.Constants.HTML;
/**
* Convert OSIS tags into html tags
* <p/>
* Example OSIS tags from KJV Ps 119 v1 showing title, w, note <title
* canonical="true" subType="x-preverse" type="section"> <foreign
* n="?">ALEPH.</foreign> </title> <w lemma="strong:H0835">Blessed</w>
* <transChange type="added">are</transChange> <w lemma="strong:H08549">the
* undefiled</w> ... <w lemma="strong:H01980" morph="strongMorph:TH8802">who
* walk</w> ... <w lemma="strong:H03068">of the
* <seg><divineName>Lord</divineName></seg></w>. <note type="study">undefiled:
* or, perfect, or, sincere</note>
* <p/>
* Example of notes cross references from ESV In the <note n="a"
* osisID="Gen.1.1!crossReference.a" osisRef="Gen.1.1"
* type="crossReference"><reference osisRef="Job.38.4-Job.38.7">Job
* 38:4-7</reference>; <reference osisRef="Ps.33.6">Ps. 33:6</reference>;
* <reference osisRef="Ps.136.5">136:5</reference>; <reference
* osisRef="Isa.42.5">Isa. 42:5</reference>; <reference
* osisRef="Isa.45.18">45:18</reference>; <reference
* osisRef="John.1.1-John.1.3">John 1:1-3</reference>; <reference
* osisRef="Acts.14.15">Acts 14:15</reference>; <reference
* osisRef="Acts.17.24">17:24</reference>; <reference
* osisRef="Col.1.16-Col.1.17">Col. 1:16, 17</reference>; <reference
* osisRef="Heb.1.10">Heb. 1:10</reference>; <reference
* osisRef="Heb.11.3">11:3</reference>; <reference osisRef="Rev.4.11">Rev.
* 4:11</reference></note>beginning
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class OsisToHtmlSaxHandler extends OsisSaxHandler {
private static final String HEBREW_LANGUAGE_CODE = "he";
// properties
private OsisToHtmlParameters parameters;
// tag handlers for the different OSIS tags
private VerseHandler verseHandler;
private MyNoteMarker myNoteMarker;
private BookmarkMarker bookmarkMarker;
private NoteHandler noteHandler;
private ReferenceHandler referenceHandler;
private RefHandler refHandler;
private TitleHandler titleHandler;
private QHandler qHandler;
private LGHandler lgHandler;
private LHandler lHandler;
private HiHandler hiHandler;
private OrthHandler orthHandler;
private PronHandler pronHandler;
private StrongsHandler strongsHandler;
private FigureHandler figureHandler;
// processor for the tag content
private TextPreprocessor textPreprocessor;
// internal logic
private VerseInfo verseInfo = new VerseInfo();
private boolean isAnyTextWritten = false;
public OsisToHtmlSaxHandler(OsisToHtmlParameters parameters) {
super();
this.parameters = parameters;
verseHandler = new VerseHandler(parameters, verseInfo, getWriter());
myNoteMarker = new MyNoteMarker(parameters, verseInfo, getWriter());
bookmarkMarker = new BookmarkMarker(parameters, verseInfo, getWriter());
referenceHandler = new ReferenceHandler(parameters, noteHandler, getWriter());
refHandler = new RefHandler(parameters, noteHandler, getWriter());
titleHandler = new TitleHandler(parameters, verseInfo, getWriter());
qHandler = new QHandler(parameters, getWriter());
hiHandler = new HiHandler(parameters, getWriter());
orthHandler = new OrthHandler(parameters, getWriter());
pronHandler = new PronHandler(parameters, getWriter());
lgHandler = new LGHandler(parameters, getWriter());
lHandler = new LHandler(parameters, getWriter());
strongsHandler = new StrongsHandler(parameters, getWriter());
figureHandler = new FigureHandler(parameters, getWriter());
//TODO at the moment we can only have a single TextPreprocesor, need to chain them and maybe make the writer a TextPreprocessor and put it at the end of the chain
if (HEBREW_LANGUAGE_CODE.equals(parameters.getLanguageCode())) {
textPreprocessor = new HebrewCharacterPreprocessor();
} else if (parameters.isConvertStrongsRefsToLinks()) {
textPreprocessor = new StrongsLinkCreator();
}
}
@Override
public void startDocument() {
String jsTag = "\n<script type='text/javascript' src='file:///android_asset/web/script.js'></script>\n";
String styleSheetTag = "<link href='file:///android_asset/web/style.css' rel='stylesheet' type='text/css'/>";
String extraStyleSheetTag = "";
if (parameters.getExtraStylesheet() != null) {
extraStyleSheetTag = "<link href='file:///android_asset/web/"
+ parameters.getExtraStylesheet()
+ "' rel='stylesheet' type='text/css'/>";
}
write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"> "
+ "<html xmlns='http://www.w3.org/1999/xhtml' dir='" + getDirection() + "'><head>"
+ styleSheetTag + extraStyleSheetTag + "\n"
+ jsTag
+ "<meta charset='utf-8'/>"
+ "</head>"
+ "<body onscroll='jsonscroll()' onload='jsonload()' >");
// force rtl for rtl languages - rtl support on Android is poor but
// forcing it seems to help occasionally
if (!parameters.isLeftToRight()) {
write("<span dir='rtl'>");
}
}
/*
* Called when the Parser Completes parsing the Current XML File.
*/
@Override
public void endDocument() {
// close last verse
if (parameters.isVersePerline()) {
//close last verse
if (verseInfo.currentVerseNo > 1) {
write("</div>");
}
}
// add optional footer e.g. Strongs show all occurrences link
if (StringUtils.isNotEmpty(parameters.getExtraFooter())) {
write(parameters.getExtraFooter());
}
if (!parameters.isLeftToRight()) {
write("</span>");
}
// add padding at bottom to allow last verse to scroll to top of page
// and become current verse
write("</body></html>");
}
/*
* Called when the starting of the Element is reached. For Example if we
* have Tag called <Title> ... </Title>, then this method is called when
* <Title> tag is Encountered while parsing the Current XML File. The
* AttributeList Parameter has the list of all Attributes declared for the
* Current Element in the XML File.
*/
@Override
public void startElement(String namespaceURI,
String sName, // simple name
String qName, // qualified name
Attributes attrs) {
String name = getName(sName, qName); // element name
debug(name, attrs, true);
if (name.equals(OSISUtil.OSIS_ELEMENT_VERSE)) {
verseHandler.startAndUpdateVerse(attrs);
bookmarkMarker.start();
myNoteMarker.start();
// record that we are into a new verse
verseInfo.isTextSinceVerse = false;
} else if (name.equals(OSISUtil.OSIS_ELEMENT_TITLE)) {
titleHandler.start(attrs);
} else if (name.equals(OSISUtil.OSIS_ELEMENT_NOTE)) {
noteHandler.startNote(attrs);
} else if (name.equals(OSISUtil.OSIS_ELEMENT_REFERENCE)) {
referenceHandler.start(attrs);
} else if (name.equals(TEIUtil.TEI_ELEMENT_REF)) {
refHandler.start(attrs);
} else if (name.equals(OSISUtil.OSIS_ELEMENT_LB)) {
if (isAnyTextWritten) {
write(HTML.BR);
}
} else if (name.equals(OSISUtil.OSIS_ELEMENT_LG)) {
lgHandler.start(attrs);
} else if (name.equals(OSISUtil.OSIS_ELEMENT_L)) {
lHandler.startL(attrs);
} else if (name.equals("div")) {
String type = attrs.getValue("type");
if ("paragraph".equals(type)) {
// ignore sID start paragraph sID because it often comes after the verse no and causes a gap between verse no verse text
String eID = attrs.getValue("eID");
if (eID != null && isAnyTextWritten) {
write("<p />");
}
}
} else if (name.equals(OSISUtil.OSIS_ELEMENT_P)) {
write("<p>");
} else if (name.equals(OSISUtil.OSIS_ELEMENT_Q)) {
qHandler.start(attrs);
} else if (name.equals(OSISUtil.OSIS_ELEMENT_HI)) {
hiHandler.start(attrs);
} else if (name.equals(TEIUtil.TEI_ELEMENT_ORTH)) {
orthHandler.start(attrs);
} else if (name.equals(TEIUtil.TEI_ELEMENT_PRON)) {
pronHandler.start(attrs);
} else if (name.equals("milestone")) {
String type = attrs.getValue(OSISUtil.OSIS_ATTR_TYPE);
if (StringUtils.isNotEmpty(type)) {
if (type.equals("line") || type.equals("x-p")) {
if (isAnyTextWritten) {
//e.g. NETtext Mt 4:14; KJV Gen 1:6
writeOptionallyBeforeVerse(HTML.BR);
}
}
}
} else if (name.equals("transChange")) {
write("<span class='transChange'>");
} else if (name.equals(OSISUtil.OSIS_ELEMENT_W)) {
strongsHandler.start(attrs);
} else if (name.equals(OSISUtil.OSIS_ELEMENT_FIGURE)) {
figureHandler.start(attrs);
} else {
// TODO: Cleanup
Log.i("OsisToHtmlSaxHandler", "Verse " + verseInfo.currentVerseNo + " unsupported OSIS tag:" + name);
}
}
/*
* Called when the Ending of the current Element is reached. For example in
* the above explanation, this method is called when </Title> tag is reached
*/
@Override
public void endElement(String namespaceURI, String sName, // simple name
String qName // qualified name
) {
String name = getName(sName, qName);
debug(name, null, false);
if (name.equals(OSISUtil.OSIS_ELEMENT_TITLE)) {
titleHandler.end();
} else if (name.equals(OSISUtil.OSIS_ELEMENT_VERSE)) {
myNoteMarker.end();
bookmarkMarker.end();
verseHandler.end();
} else if (name.equals(OSISUtil.OSIS_ELEMENT_NOTE)) {
noteHandler.endNote();
} else if (name.equals(OSISUtil.OSIS_ELEMENT_REFERENCE)) {
referenceHandler.end();
} else if (name.equals(TEIUtil.TEI_ELEMENT_REF)) {
refHandler.end();
} else if (name.equals(OSISUtil.OSIS_ELEMENT_LG)) {
lgHandler.end();
} else if (name.equals(OSISUtil.OSIS_ELEMENT_L)) {
lHandler.endL();
} else if (name.equals(OSISUtil.OSIS_ELEMENT_P)) {
write("</p>");
} else if (name.equals(OSISUtil.OSIS_ELEMENT_Q)) {
// end quotation, but <q /> tag is a marker and contains no content
// so <q /> will appear at beginning and end of speech
qHandler.end();
} else if (name.equals(OSISUtil.OSIS_ELEMENT_HI)) {
hiHandler.end();
} else if (name.equals(TEIUtil.TEI_ELEMENT_ORTH)) {
orthHandler.end();
} else if (name.equals(TEIUtil.TEI_ELEMENT_PRON)) {
pronHandler.end();
} else if (name.equals("transChange")) {
write("</span>");
} else if (name.equals(OSISUtil.OSIS_ELEMENT_W)) {
strongsHandler.end();
}
}
/*
* While Parsing the XML file, if extra characters like space or enter
* Character are encountered then this method is called. If you don't want
* to do anything special with these characters, then you can normally leave
* this method blank.
*/
@Override
public void characters(char buf[], int offset, int len) {
String s = new String(buf, offset, len);
// record that we are now beyond the verse, but do it quickly so as not to slow down parsing
verseInfo.isTextSinceVerse = verseInfo.isTextSinceVerse ||
len > 2 ||
StringUtils.isNotBlank(s);
isAnyTextWritten = isAnyTextWritten || verseInfo.isTextSinceVerse;
if (textPreprocessor != null) {
s = textPreprocessor.process(s);
}
write(s);
}
/**
* allow line breaks and titles to be moved before verse number
*/
protected void writeOptionallyBeforeVerse(String s) {
boolean writeBeforeVerse = !verseInfo.isTextSinceVerse;
if (writeBeforeVerse) {
getWriter().beginInsertAt(verseInfo.positionToInsertBeforeVerse);
}
getWriter().write(s);
if (writeBeforeVerse) {
getWriter().finishInserting();
}
}
/*
* In the XML File if the parser encounters a Processing Instruction which
* is declared like this <?ProgramName:BooksLib
* QUERY="author, isbn, price"?> Then this method is called where Target
* parameter will have "ProgramName:BooksLib" and data parameter will have
* QUERY="author, isbn, price". You can invoke a External Program from this
* Method if required.
*/
public void processingInstruction(String target, String data) {
// noop
}
public String getDirection() {
return parameters.isLeftToRight() ? "ltr" : "rtl";
}
class VerseInfo {
int currentVerseNo;
int positionToInsertBeforeVerse;
boolean isTextSinceVerse = false;
}
}

View File

@ -0,0 +1,86 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import org.crosswire.jsword.book.OSISUtil;
import org.xml.sax.Attributes;
import java.util.Stack;
/**
* This can either signify a quote or Red Letter
* Example from ESV
* But he answered them, <q marker="" who="Jesus"><q level="1" marker="<EFBFBD>" sID="40024002.1"/>You see all these
* Example from KJV
* said ... unto them, <q who="Jesus">...See ye
* <p/>
* Apparently quotation marks are not supposed to appear in the KJV (https://sites.google.com/site/kjvtoday/home/Features-of-the-KJV/quotation-marks)
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class QHandler {
private static final String MARKER = "marker";
private static final String HTML_QUOTE_ENTITY = "&quot;";
private HtmlTextWriter writer;
private OsisToHtmlParameters parameters;
;
// quotes can be embedded so maintain a stack of info about each quote to be used when closing quote
private Stack<QuoteInfo> stack = new Stack<QuoteInfo>();
public QHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) {
this.parameters = parameters;
this.writer = writer;
}
public String getTagName() {
return "q";
}
public void start(Attributes attrs) {
QuoteInfo quoteInfo = new QuoteInfo();
String who = attrs.getValue(OSISUtil.ATTRIBUTE_Q_WHO);
boolean isWho = who != null;
quoteInfo.isMilestone = TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_SID, attrs) || TagHandlerHelper.isAttr(OSISUtil.OSIS_ATTR_EID, attrs);
// Jesus -> no default quote
quoteInfo.marker = TagHandlerHelper.getAttribute(MARKER, attrs, isWho ? "" : HTML_QUOTE_ENTITY);
quoteInfo.isRedLetter = parameters.isRedLetter() && "Jesus".equals(who);
// apply the above logic
writer.write(quoteInfo.marker);
if (quoteInfo.isRedLetter) {
writer.write("<span class='redLetter'>");
}
// and save the info for the closing tag
stack.push(quoteInfo);
}
public void end() {
QuoteInfo quoteInfo = stack.pop();
// Jesus words
if (quoteInfo.isRedLetter) {
writer.write("</span>");
}
// milestone opening and closing tags are doubled up so ensure not double quotes
if (!quoteInfo.isMilestone) {
writer.write(quoteInfo.marker);
}
}
enum QType {quote, redLetter}
private static class QuoteInfo {
private boolean isMilestone;
private boolean isRedLetter;
private String marker = HTML_QUOTE_ENTITY;
}
}

View File

@ -0,0 +1,157 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import android.util.Log;
import org.apache.commons.lang3.StringUtils;
import org.crosswire.jsword.book.OSISUtil;
import org.crosswire.jsword.passage.Key;
import org.crosswire.jsword.passage.Passage;
import org.crosswire.jsword.passage.PassageKeyFactory;
import org.crosswire.jsword.passage.RestrictionType;
import org.crosswire.jsword.passage.VerseRange;
import org.xml.sax.Attributes;
import java.util.Iterator;
import static org.bspeice.minimalbible.service.format.Constants.BIBLE_PROTOCOL;
/**
* Convert OSIS tags into html tags
* <p/>
* Example OSIS tags from KJV Ps 119 v1 showing title, w, note
* <title canonical="true" subType="x-preverse" type="section">
* <foreign n="?">ALEPH.</foreign>
* </title>
* <w lemma="strong:H0835">Blessed</w> <transChange type="added">are</transChange> <w lemma="strong:H08549">the undefiled</w>
* ... <w lemma="strong:H01980" morph="strongMorph:TH8802">who walk</w>
* ... <w lemma="strong:H03068">of the <seg><divineName>Lord</divineName></seg></w>.
* <note type="study">undefiled: or, perfect, or, sincere</note>
* <p/>
* Example of notes cross references from ESV
* In the <note n="a" osisID="Gen.1.1!crossReference.a" osisRef="Gen.1.1" type="crossReference"><reference osisRef="Job.38.4-Job.38.7">Job 38:4-7</reference>; <reference osisRef="Ps.33.6">Ps. 33:6</reference>; <reference osisRef="Ps.136.5">136:5</reference>; <reference osisRef="Isa.42.5">Isa. 42:5</reference>; <reference osisRef="Isa.45.18">45:18</reference>; <reference osisRef="John.1.1-John.1.3">John 1:1-3</reference>; <reference osisRef="Acts.14.15">Acts 14:15</reference>; <reference osisRef="Acts.17.24">17:24</reference>; <reference osisRef="Col.1.16-Col.1.17">Col. 1:16, 17</reference>; <reference osisRef="Heb.1.10">Heb. 1:10</reference>; <reference osisRef="Heb.11.3">11:3</reference>; <reference osisRef="Rev.4.11">Rev. 4:11</reference></note>beginning
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class ReferenceHandler {
private final String TAG = "ReferenceHandler";
private OsisToHtmlParameters parameters;
private String currentRefOsisRef;
private NoteHandler noteHandler;
private HtmlTextWriter writer;
public ReferenceHandler(OsisToHtmlParameters osisToHtmlParameters, NoteHandler noteHandler, HtmlTextWriter theWriter) {
this.parameters = osisToHtmlParameters;
this.noteHandler = noteHandler;
this.writer = theWriter;
}
public void start(Attributes attrs) {
// store the osisRef attribute for use with the note
String target = attrs.getValue(OSISUtil.OSIS_ATTR_REF);
start(target);
}
protected void start(String target) {
// don't need to do anything until closing reference tag except..
// delete separators like ';' that sometimes occur between reference tags
writer.clearTempStore();
writer.writeToTempStore();
// store the osisRef attribute for use with the note
this.currentRefOsisRef = target;
}
public void end() {
writer.finishWritingToTempStore();
if (noteHandler.isInNote() || parameters.isAutoWrapUnwrappedRefsInNote()) {
noteHandler.addNoteForReference(writer.getTempStoreString(), currentRefOsisRef);
} else {
String refText = writer.getTempStoreString();
writer.write(getReferenceTag(currentRefOsisRef, refText));
}
// and clear the buffer
writer.clearTempStore();
currentRefOsisRef = null;
}
/**
* create a link tag from an OSISref and the content of the tag
*/
private String getReferenceTag(String reference, String content) {
Log.d(TAG, "Ref:" + reference + " Content:" + content);
StringBuilder result = new StringBuilder();
try {
//JSword does not know the basis (default book) so prepend it if it looks like JSword failed to work it out
//We only need to worry about the first ref because JSword uses the first ref as the basis for the subsequent refs
// if content starts with a number and is not followed directly by an alpha char e.g. 1Sa
if (reference == null && content != null && content.length() > 0 && StringUtils.isNumeric(content.subSequence(0, 1)) &&
(content.length() < 2 || !StringUtils.isAlphaSpace(content.subSequence(1, 2)))) {
// maybe should use VerseRangeFactory.fromstring(orig, basis)
// this check for a colon to see if the first ref is verse:chap is not perfect but it will do until JSword adds a fix
int firstColonPos = content.indexOf(":");
boolean isVerseAndChapter = firstColonPos > 0 && firstColonPos < 4;
if (isVerseAndChapter) {
reference = parameters.getBasisRef().getBook().getOSIS() + " " + content;
} else {
reference = parameters.getBasisRef().getBook().getOSIS() + " " + parameters.getBasisRef().getChapter() + ":" + content;
}
Log.d(TAG, "Patched reference:" + reference);
} else if (reference == null) {
reference = content;
}
// convert urns of type book:key to sword://book/key to simplify urn parsing (1 fewer case to check for).
// Avoid urls of type 'matt 3:14' by excludng urns with a space
if (reference.contains(":") && !reference.contains(" ") && !reference.startsWith("sword://")) {
reference = "sword://" + reference.replace(":", "/");
}
boolean isFullSwordUrn = reference.contains("/") && reference.contains(":");
if (isFullSwordUrn) {
// e.g. sword://StrongsRealGreek/01909
// don't play with the reference - just assume it is correct
result.append("<a href='").append(reference).append("'>");
result.append(content);
result.append("</a>");
} else {
Passage ref = (Passage) PassageKeyFactory.instance().getKey(parameters.getDocumentVersification(), reference);
boolean isSingleVerse = ref.countVerses() == 1;
boolean isSimpleContent = content.length() < 3 && content.length() > 0;
Iterator<VerseRange> it = ref.rangeIterator(RestrictionType.CHAPTER);
if (isSingleVerse && isSimpleContent) {
// simple verse no e.g. 1 or 2 preceding the actual verse in TSK
result.append("<a href='").append(BIBLE_PROTOCOL).append(":").append(it.next().getOsisRef()).append("'>");
result.append(content);
result.append("</a>");
} else {
// multiple complex references
boolean isFirst = true;
while (it.hasNext()) {
Key key = it.next();
if (!isFirst) {
result.append(" ");
}
result.append("<a href='").append(BIBLE_PROTOCOL).append(":").append(key.iterator().next().getOsisRef()).append("'>");
result.append(key);
result.append("</a>");
isFirst = false;
}
}
}
} catch (Exception e) {
Log.e(TAG, "Error parsing OSIS reference:" + reference);
// just return the content with no html markup
result.append(content);
}
return result.toString();
}
}

View File

@ -0,0 +1,82 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import android.util.Log;
import org.apache.commons.lang3.StringUtils;
import org.xml.sax.Attributes;
/**
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class TagHandlerHelper {
private static final String TAG = "TagHandlerHelper";
/**
* support defaultvalue with attribute fetch
*/
public static String getAttribute(String attributeName, Attributes attrs, String defaultValue) {
String attrValue = attrs.getValue(attributeName);
if (attrValue != null) {
return attrValue;
} else {
return defaultValue;
}
}
/**
* support defaultvalue with attribute fetch
*/
public static int getAttribute(String attributeName, Attributes attrs, int defaultValue) {
int retval = defaultValue;
try {
String attrValue = attrs.getValue(attributeName);
if (attrValue != null) {
retval = Integer.parseInt(attrValue);
}
} catch (Exception e) {
Log.w(TAG, "Non numeric but expected integer for " + attributeName);
}
return retval;
}
/**
* see if an attribute exists and has a value
*
* @param attributeName
* @param attrs
* @return
*/
public static boolean isAttr(String attributeName, Attributes attrs) {
String attrValue = attrs.getValue(attributeName);
return StringUtils.isNotEmpty(attrValue);
}
/**
* return verse from osis id of format book.chap.verse
*
* @param osisID osis Id
* @return verse number
*/
public static int osisIdToVerseNum(String osisID) {
/* You have to use "\\.", the first backslash is interpreted as an escape by the
Java compiler, so you have to use two to get a String that contains one
backslash and a dot, which is what you want the regexp engine to see.*/
if (osisID != null) {
String[] parts = osisID.split("\\.");
if (parts.length > 1) {
String verse = parts[parts.length - 1];
return Integer.valueOf(verse);
}
}
return 0;
}
public static void printAttributes(Attributes attrs) {
for (int i = 0; i < attrs.getLength(); i++) {
Log.d(TAG, attrs.getLocalName(i) + ":" + attrs.getValue(i));
}
}
}

View File

@ -0,0 +1,79 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import org.apache.commons.lang3.StringUtils;
import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlSaxHandler.VerseInfo;
import org.crosswire.jsword.book.OSISUtil;
import org.xml.sax.Attributes;
/**
* This can either signify a quote or Red Letter
* Example
* ESV section heading <title subType="x-preverse" type="section">
* ESV canonical heading<title canonical="true" subType="x-preverse" type="section">To the choirmaster. Of David,
* WEB when formatted with JSword seems to have type="x-gen"
* <p/>
* Apparently quotation marks are not supposed to appear in the KJV (https://sites.google.com/site/kjvtoday/home/Features-of-the-KJV/quotation-marks)
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class TitleHandler {
private static final String PREVERSE = "preverse"; // the full string is 'x-preverse' but we just check for contains for extra tolerance
private HtmlTextWriter writer;
private VerseInfo verseInfo;
private OsisToHtmlParameters parameters;
private boolean isShowTitle;
private boolean isMoveBeforeVerse;
public TitleHandler(OsisToHtmlParameters parameters, VerseInfo verseInfo, HtmlTextWriter writer) {
this.parameters = parameters;
this.verseInfo = verseInfo;
this.writer = writer;
}
public String getTagName() {
return OSISUtil.OSIS_ELEMENT_TITLE;
}
public void start(Attributes attrs) {
//JSword adds the chapter no at the top but hide this because the chapter is in the And Bible header
boolean addedByJSword = attrs.getLength() == 1 && OSISUtil.GENERATED_CONTENT.equals(attrs.getValue(OSISUtil.OSIS_ATTR_TYPE));
// otherwise show if user wants Titles or the title is canonical
isShowTitle = !addedByJSword &&
(parameters.isShowTitles() ||
"true".equalsIgnoreCase(attrs.getValue(OSISUtil.OSIS_ATTR_CANONICAL)));
if (isShowTitle) {
// ESV has subType butNETtext has lower case subtype so concatenate both and search with contains()
String subtype = attrs.getValue(OSISUtil.OSIS_ATTR_SUBTYPE) + attrs.getValue(OSISUtil.OSIS_ATTR_SUBTYPE.toLowerCase());
isMoveBeforeVerse = StringUtils.containsIgnoreCase(subtype, PREVERSE) || (!verseInfo.isTextSinceVerse && verseInfo.currentVerseNo > 0);
if (isMoveBeforeVerse) {
// section Titles normally come before a verse, so overwrite the, already written verse, which is rewritten on writer.finishedInserting
writer.beginInsertAt(verseInfo.positionToInsertBeforeVerse);
}
// get title type from level
String titleClass = "heading" + TagHandlerHelper.getAttribute(OSISUtil.OSIS_ATTR_LEVEL, attrs, "1");
writer.write("<h1 class='" + titleClass + "'>");
} else {
writer.setDontWrite(true);
}
}
public void end() {
if (isShowTitle) {
writer.write("</h1>");
if (isMoveBeforeVerse) {
// move positionToInsertBeforeVerse forward to after this title otherwise any subtitle will be above the title
verseInfo.positionToInsertBeforeVerse = writer.getPosition();
writer.finishInserting();
}
} else {
writer.setDontWrite(false);
}
}
}

View File

@ -0,0 +1,81 @@
package org.bspeice.minimalbible.service.format.osistohtml;
import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlSaxHandler.VerseInfo;
import org.crosswire.jsword.book.OSISUtil;
import org.xml.sax.Attributes;
import static org.bspeice.minimalbible.service.format.Constants.HTML;
/**
* Write the verse number at the beginning of a verse
* Also handle verse per line
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class VerseHandler {
private OsisToHtmlParameters parameters;
private VerseInfo verseInfo;
private int writerRollbackPosition;
private HtmlTextWriter writer;
public VerseHandler(OsisToHtmlParameters parameters, VerseInfo verseInfo, HtmlTextWriter writer) {
this.parameters = parameters;
this.verseInfo = verseInfo;
this.writer = writer;
}
public String getTagName() {
return OSISUtil.OSIS_ELEMENT_VERSE;
}
public void startAndUpdateVerse(Attributes attrs) {
writerRollbackPosition = writer.getPosition();
if (attrs != null) {
verseInfo.currentVerseNo = TagHandlerHelper.osisIdToVerseNum(attrs.getValue("", OSISUtil.OSIS_ATTR_OSISID));
} else {
verseInfo.currentVerseNo++;
}
if (parameters.isVersePerline()) {
//close preceding verse
if (verseInfo.currentVerseNo > 1) {
writer.write("</div>");
}
// start current verse
writer.write("<div>");
}
writeVerse(verseInfo.currentVerseNo);
}
public void end() {
if (!verseInfo.isTextSinceVerse) {
writer.removeAfter(writerRollbackPosition);
}
}
private void writeVerse(int verseNo) {
verseInfo.positionToInsertBeforeVerse = writer.getPosition();
// The id is used to 'jump to' the verse using javascript so always need the verse tag with an id
// Do not show verse 0
StringBuilder verseHtml = new StringBuilder();
if (parameters.isShowVerseNumbers() && verseNo != 0) {
verseHtml.append(" <span class='verse' id='").append(verseNo).append("'>").append(verseNo).append("</span>").append(HTML.NBSP);
} else {
// we really want an empty span but that is illegal and causes problems such as incorrect verse calculation in Psalms
// so use something that will hopefully interfere as little as possible - a zero-width-space
// also put a space before it to allow a separation from the last word of previous verse or to be ignored if start of line
verseHtml.append(" <span class='verse' id='").append(verseNo).append("'/>&#x200b;</span>");
}
writer.write(verseHtml.toString());
}
}

View File

@ -0,0 +1,99 @@
package org.bspeice.minimalbible.service.format.osistohtml.preprocessor;
import android.os.Build;
import org.apache.commons.lang3.StringUtils;
/**
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class HebrewCharacterPreprocessor implements TextPreprocessor {
// the following characters are not handled well in Android 2.2 & 2.3 and
// need special processing which for all except Sof Pasuq means removal
// puctuation char at the end of hebrew verses that looks like a ':'
private static final String HEBREW_SOF_PASUQ_CHAR = "\u05C3";
// vowels are on the first row and cantillations on the second
private static final char[] HEBREW_VOWELS_AND_CANTILLATIONS = new char[]{
'\u05B0', '\u05B1', '\u05B2', '\u05B3', '\u05B4', '\u05B5',
'\u05B6', '\u05B7', '\u05B8', '\u05B9', '\u05BA', '\u05BB',
'\u05BC', '\u05BD', '\u05BE', '\u05BF', '\u05C1', '\u05C2',
'\u0591', '\u0592', '\u0593', '\u0594', '\u0595', '\u0596',
'\u0597', '\u0598', '\u0599', '\u059A', '\u059B', '\u059C',
'\u059D', '\u059E', '\u05A0', '\u05A1', '\u05A2', '\u05A3',
'\u05A4', '\u05A5', '\u05A6', '\u05A7', '\u05A8', '\u05A9',
'\u05AA', '\u05AB', '\u05AC', '\u05AD', '\u05AE', '\u05AF'};
/**
* StringUtils methods only compare with a single char and hence create lots
* of temporary Strings This method compares with all chars and just creates
* one new string for each original string. This is to minimise memory
* overhead & gc.
*
* @param str
* @param removeChars
* @return
*/
public static String remove(String str, char[] removeChars) {
if (StringUtils.isEmpty(str)
|| !StringUtils.containsAny(str, removeChars)) {
return str;
}
StringBuilder r = new StringBuilder(str.length());
// for all chars in string
for (int i = 0; i < str.length(); i++) {
char strCur = str.charAt(i);
// compare with all chars to be removed
boolean matched = false;
for (int j = 0; j < removeChars.length && !matched; j++) {
if (removeChars[j] == strCur) {
matched = true;
}
}
// if current char does not match any in the list then add it to the
if (!matched) {
r.append(strCur);
}
}
return r.toString();
}
/**
* Some characters are not handled well in Android 2.2 & 2.3 and need
* special processing which for all except Sof Pasuq means removal
*
* @param text
* @return adjusted string
*/
@Override
public String process(String text) {
if (isVowelsBugFixed()) {
return text;
} else {
return doHebrewCharacterAdjustments(text);
}
}
/**
* vowels rtl problem fixed in recent cyanogenmod and 4.0.3
*/
private boolean isVowelsBugFixed() {
return Build.VERSION.SDK_INT >= 15 || //Build.VERSION_CODES.ICE_CREAM_SANDWICH_MR1;
(Build.VERSION.SDK_INT >= 10 && System.getProperty("os.version").contains("cyanogenmod")); // 10 is GINGERBREAD_MR1 (2.3.3)
}
private String doHebrewCharacterAdjustments(String s) {
// remove Hebrew vowels because i) they confuse bidi and ii) they are
// not positioned correctly under/over the appropriate letter
// http://groups.google.com/group/android-contrib/browse_thread/thread/5b6b079f9ec7792a?pli=1
s = remove(s, HEBREW_VOWELS_AND_CANTILLATIONS);
// even without vowel points the : at the end of each verse confuses
// Android's bidi but specifying the char as rtl helps
s = s.replace(HEBREW_SOF_PASUQ_CHAR, "<span dir='rtl'>"
+ HEBREW_SOF_PASUQ_CHAR + "</span> ");
return s;
}
}

View File

@ -0,0 +1,14 @@
package org.bspeice.minimalbible.service.format.osistohtml.preprocessor;
/**
* preprocess text content in the Sword module
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public interface TextPreprocessor {
/* convert module text to that required for display
*/
String process(String text);
}

View File

@ -0,0 +1,170 @@
package org.bspeice.minimalbible.service.format.osistohtml.strongs;
import org.apache.commons.lang3.StringUtils;
import org.bspeice.minimalbible.service.format.osistohtml.HtmlTextWriter;
import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlParameters;
import org.bspeice.minimalbible.service.format.osistohtml.TagHandlerHelper;
import org.crosswire.jsword.book.OSISUtil;
import org.xml.sax.Attributes;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import static org.bspeice.minimalbible.service.format.Constants.HTML;
/**
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class StrongsHandler {
List<String> pendingStrongsAndMorphTags;
;
private HtmlTextWriter writer;
private OsisToHtmlParameters parameters;
public StrongsHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) {
this.parameters = parameters;
this.writer = writer;
}
public String getTagName() {
return "q";
}
public void start(Attributes attrs) {
if ((parameters.isShowStrongs() || parameters.isShowMorphology()) && TagHandlerHelper.isAttr(OSISUtil.ATTRIBUTE_W_LEMMA, attrs)) {
// Strongs & morphology references
// example of strongs refs: <w lemma="strong:H0430">God</w> <w lemma="strong:H0853 strong:H01254" morph="strongMorph:TH8804">created</w>
// better example, because we just use Robinson: <w lemma="strong:G652" morph="robinson:N-NSM" src="2">an apostle</w>
String strongsLemma = attrs.getValue(OSISUtil.ATTRIBUTE_W_LEMMA);
if (strongsLemma.startsWith(OSISUtil.LEMMA_STRONGS)) {
String morphology = attrs.getValue(OSISUtil.ATTRIBUTE_W_MORPH);
pendingStrongsAndMorphTags = getStrongsAndMorphTags(strongsLemma, morphology);
}
}
}
public void end() {
if ((parameters.isShowStrongs() || parameters.isShowMorphology())) {
if (pendingStrongsAndMorphTags != null) {
for (int i = 0; i < pendingStrongsAndMorphTags.size(); i++) {
writer.write(HTML.SPACE); // separator between adjacent tags and words
writer.write(pendingStrongsAndMorphTags.get(i));
}
writer.write(HTML.SPACE); // separator between adjacent tags and words
pendingStrongsAndMorphTags = null;
}
}
}
/**
* Convert a Strongs lemma into a url E.g. lemmas "strong:H0430",
* "strong:H0853 strong:H01254"
*
* @return a single char to use as a note ref
*/
private List<String> getStrongsAndMorphTags(String strongsLemma,
String morphology) {
// there may occasionally be more than on ref so split them into a list
// of single refs
List<String> strongsTags = getStrongsTags(strongsLemma);
List<String> morphTags = getMorphTags(morphology);
List<String> mergedStrongsAndMorphTags = new ArrayList<String>();
// each morph tag should relate to a Strongs tag so they should be same
// length but can't assume that
// merge the tags into the merge list
for (int i = 0; i < Math.max(strongsTags.size(), morphTags.size()); i++) {
StringBuilder merged = new StringBuilder();
if (i < strongsTags.size()) {
merged.append(strongsTags.get(i));
}
if (i < morphTags.size()) {
merged.append(morphTags.get(i));
}
mergedStrongsAndMorphTags.add(merged.toString());
}
// for some reason the generic tags should come last and the order seems
// always reversed in other systems
// the second tag (once reversed) seems to relate to a missing word like
// eth
Collections.reverse(mergedStrongsAndMorphTags);
return mergedStrongsAndMorphTags;
}
private List<String> getStrongsTags(String strongsLemma) {
// there may occasionally be more than on ref so split them into a list
// of single refs
List<String> strongsTags = new ArrayList<String>();
if (parameters.isShowStrongs()) {
String[] refList = strongsLemma.split(" ");
for (String ref : refList) {
// ignore if string doesn't start with "strong;"
if (ref.startsWith(OSISUtil.LEMMA_STRONGS)
&& ref.length() > OSISUtil.LEMMA_STRONGS.length() + 2) {
// reduce ref like "strong:H0430" to "H0430"
ref = ref.substring(OSISUtil.LEMMA_STRONGS.length());
// select Hebrew or Greek protocol
String protocol = StrongsUtil.getStrongsProtocol(ref);
if (protocol != null) {
// remove initial G or H
String strongsNumber = ref.substring(1);
String strTag = StrongsUtil.createStrongsLink(protocol, strongsNumber);
strongsTags.add(strTag);
}
}
}
}
return strongsTags;
}
/**
* example of strongs and morphology, we just use Robinson: <w
* lemma="strong:G652" morph="robinson:N-NSM" src="2">an apostle</w>
*
* @param morphology
* @return
*/
private List<String> getMorphTags(String morphology) {
// there may occasionally be more than on ref so split them into a list
// of single refs
List<String> morphTags = new ArrayList<String>();
if (parameters.isShowMorphology()) {
if (StringUtils.isNotEmpty(morphology)) {
String[] refList = morphology.split(" ");
for (String ref : refList) {
// ignore if string doesn't start with "robinson"
if (ref.startsWith(OSISUtil.MORPH_ROBINSONS)
&& ref.length() > OSISUtil.MORPH_ROBINSONS.length() + 2) {
// reduce ref like "robinson:N-NSM" to "N-NSM" for
// display
String display = ref.substring(OSISUtil.MORPH_ROBINSONS
.length());
StringBuilder tag = new StringBuilder();
tag.append("<a href='").append(ref).append(
"' class='morphology'>").append(display)
.append("</a>");
morphTags.add(tag.toString());
}
}
}
}
return morphTags;
}
enum QType {quote, redLetter}
}

View File

@ -0,0 +1,43 @@
package org.bspeice.minimalbible.service.format.osistohtml.strongs;
import org.bspeice.minimalbible.service.format.osistohtml.preprocessor.TextPreprocessor;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Used with StrongsGreek and StrongsHebrew to find text like 'see HEBREW for 0433' and 'see GREEK for 1223' and converts to links
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class StrongsLinkCreator implements TextPreprocessor {
static Pattern patt = Pattern.compile("see (HEBREW|GREEK) for (\\d{1,5})"); //".*see ([HEBREW|GREEK]) for (\\d{1,5}).*");
public String process(String text) {
StringBuffer result = new StringBuffer();
Matcher m = patt.matcher(text);
while (m.find()) {
String lang = m.group(1);
String refNo = m.group(2);
// select Hebrew or Greek protocol
String protocol = StrongsUtil.getStrongsProtocol(lang);
// append the actual link to the Strongs ref
String refLink = StrongsUtil.createStrongsLink(protocol, refNo, m.group(), "");
m.appendReplacement(result, refLink);
}
// append any trailing space after the last match, or if no match then the whole string
m.appendTail(result);
return result.toString();
}
}

View File

@ -0,0 +1,60 @@
package org.bspeice.minimalbible.service.format.osistohtml.strongs;
import org.apache.commons.lang3.StringUtils;
import static org.bspeice.minimalbible.service.format.Constants.GREEK_DEF_PROTOCOL;
import static org.bspeice.minimalbible.service.format.Constants.HEBREW_DEF_PROTOCOL;
/**
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class StrongsUtil {
private static final String DEFAULT_CSS_CLASS = "strongs";
/**
* create an html link for teh passed in strongs number and protocol
*
* @param protocol = G or H
* @param strongsNumber
* @return
*/
public static String createStrongsLink(String protocol, String strongsNumber) {
return createStrongsLink(protocol, strongsNumber, strongsNumber, DEFAULT_CSS_CLASS);
}
public static String createStrongsLink(String protocol, String strongsNumber, String content, String cssClass) {
// pad with leading zeros to 5 characters
String paddedRef = StringUtils.leftPad(strongsNumber, 5, "0");
StringBuilder tag = new StringBuilder();
// create opening tag for Strong's link
tag.append("<a href='");
// calculate uri e.g. H:01234
tag.append(protocol).append(":").append(paddedRef);
// set css class
tag.append("' class='" + cssClass + "'>");
// descriptive string
tag.append(content);
// link closing tag
tag.append("</a>");
String strTag = tag.toString();
return strTag;
}
public static String getStrongsProtocol(String ref) {
if (ref.startsWith("H")) {
return HEBREW_DEF_PROTOCOL;
} else if (ref.startsWith("G")) {
return GREEK_DEF_PROTOCOL;
}
return null;
}
}

View File

@ -0,0 +1,34 @@
package org.bspeice.minimalbible.service.format.osistohtml.tei;
import org.bspeice.minimalbible.service.format.osistohtml.HiHandler;
import org.bspeice.minimalbible.service.format.osistohtml.HtmlTextWriter;
import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlParameters;
import org.xml.sax.Attributes;
/**
* Handle orth tag very similarly to hi tag
* <orth>?????????</orth>
* <orth rend="bold" type="trans">aneuthetos</orth>
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class OrthHandler extends HiHandler {
private final static String DEFAULT = "bold";
public OrthHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) {
super(parameters, writer);
}
public String getTagName() {
return "orth";
}
public void start(Attributes attrs) {
String rend = attrs.getValue(TEIUtil.TEI_ATTR_REND);
start(rend, DEFAULT);
}
}

View File

@ -0,0 +1,34 @@
package org.bspeice.minimalbible.service.format.osistohtml.tei;
import org.bspeice.minimalbible.service.format.osistohtml.HiHandler;
import org.bspeice.minimalbible.service.format.osistohtml.HtmlTextWriter;
import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlParameters;
import org.xml.sax.Attributes;
/**
* Handle orth tag very similarly to hi tag
* <orth>?????????</orth>
* <orth rend="bold" type="trans">aneuthetos</orth>
*
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class PronHandler extends HiHandler {
private final static String DEFAULT = "italic";
public PronHandler(OsisToHtmlParameters parameters, HtmlTextWriter writer) {
super(parameters, writer);
}
public String getTagName() {
return "pron";
}
public void start(Attributes attrs) {
String rend = attrs.getValue(TEIUtil.TEI_ATTR_REND);
start(rend, DEFAULT);
}
}

View File

@ -0,0 +1,24 @@
package org.bspeice.minimalbible.service.format.osistohtml.tei;
import org.bspeice.minimalbible.service.format.osistohtml.HtmlTextWriter;
import org.bspeice.minimalbible.service.format.osistohtml.NoteHandler;
import org.bspeice.minimalbible.service.format.osistohtml.OsisToHtmlParameters;
import org.bspeice.minimalbible.service.format.osistohtml.ReferenceHandler;
import org.xml.sax.Attributes;
/**
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class RefHandler extends ReferenceHandler {
public RefHandler(OsisToHtmlParameters osisToHtmlParameters, NoteHandler noteHandler, HtmlTextWriter theWriter) {
super(osisToHtmlParameters, noteHandler, theWriter);
}
public void start(Attributes attrs) {
String target = attrs.getValue(TEIUtil.TEI_ATTR_TARGET);
start(target);
}
}

View File

@ -0,0 +1,18 @@
package org.bspeice.minimalbible.service.format.osistohtml.tei;
/**
* @author Martin Denham [mjdenham at gmail dot com]
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's author.
*/
public class TEIUtil {
// E.g. <ref target="StrongsHebrew:00411">H411</ref> taken from StrongsHebrew:00428
public static final String TEI_ELEMENT_REF = "ref";
public static final String TEI_ATTR_TARGET = "target";
public static final String TEI_ELEMENT_ORTH = "orth";
public static final String TEI_ELEMENT_PRON = "pron";
// the way tag contents are rendered e.g. 'bold'. 'italic'
public static final String TEI_ATTR_REND = "rend";
}