Reconocimiento de texto de imagen con tesseract en Kotlin



No es ningún secreto que Python ha tomado firmemente el liderazgo en ML y Data Science. ¿Qué pasa si miras otros lenguajes y plataformas? ¿Qué tan conveniente es tomar decisiones similares en ellos?



Por ejemplo, reconocimiento de texto en una imagen.



tesseract. Python , , , OpenCV. C++ , . jvm , , Kotlin.



Kotlin. Data Science. jvm « Python jvm». Kotlin Apache Spark.

tesseract. , wiki. , tesseract :



tesseract input_file.jpg stdout -l eng --tessdata-dir /usr/local/share/tessdata/


--tessdata-dir — tesseract (/usr/local/share/tessdata/ macos). stdout .



tesseract jvm . :



implementation("net.sourceforge.tess4j:tess4j:4.5.3")


, jvm, . Java 13+. sdkman. Intellij IDEA, Community version. IDE (new project -> Kotlin, gradle Kotlin) github, start.

tesseract . :



 val api = Tesseract()
 api.setDatapath("/usr/local/share/tessdata/")
 api.setLanguage("eng")

 val image = ImageIO.read(File("input_file.jpg"))
 val result: String = api.doOCR(image)


, . , , macos jna.library.path, dylib- tesseract.



val libPath = "/usr/local/lib"
val libTess = File(libPath, "libtesseract.dylib")
if (libTess.exists()) {
    val jnaLibPath = System.getProperty("jna.library.path")
    if (jnaLibPath == null) {
        System.setProperty("jna.library.path", libPath)
    } else {
        System.setProperty("jna.library.path", libPath + File.pathSeparator + jnaLibPath)
    }
}


.



OpenCV. Python - , pip. OpenCV java , . jvm- , - , . - , (, , djl-pytorch), . , OpenCV , :



implementation("org.openpnp:opencv:4.3.0-2")


OpenCV :



nu.pattern.OpenCV.loadLocally()


. , , - :



 Imgproc.cvtColor(mat, mat, Imgproc.COLOR_BGR2GRAY)


, OpenCV Mat, - OpenCV jvm, BufferedImage.



Mat Python imread:



val mat = Imgcodecs.imread("input.jpg")


OpenCV . Java BufferedImage, , , pipeline . BufferedImage Mat:



val image: BufferedImage = ...
val pixels = (image.raster.dataBuffer as DataBufferByte).data
val mat = Mat(image.height, image.width, CvType.CV_8UC3)
            .apply { put(0, 0, pixels) }


Mat BufferedImage:



val mat = ...
var type = BufferedImage.TYPE_BYTE_GRAY
if (mat.channels() > 1) {
    type = BufferedImage.TYPE_3BYTE_BGR
}
val bufferSize = mat.channels() * mat.cols() * mat.rows()
val b = ByteArray(bufferSize)
mat[0, 0, b] // get all the pixels
val image = BufferedImage(mat.cols(), mat.rows(), type)
val targetPixels = (image.raster.dataBuffer as DataBufferByte).data
System.arraycopy(b, 0, targetPixels, 0, b.size)


, tesseract doOCR , BufferedImage. , OpenCV, Mat Bufferedimage tesseract.



, :





. doOCR getWords, confidence (score Python-) :



val image = ImageIO.read(URL("http://img.ifcdn.com/images/b313c1f095336b6d681f75888f8932fc8a531eacd4bc436e4d4aeff7b599b600_1.jpg"))
val result = api.getWords(preparedImage, ITessAPI.TessPageIteratorLevel.RIL_WORD)


«»:



[ie, [Confidence: 2.014679 Bounding box: 100 0 13 14], bad [Confidence: 61.585358 Bounding box: 202 0 11 14], oy [Confidence: 24.619446 Bounding box: 21 68 18 22], ' [Confidence: 4.998787 Bounding box: 185 40 11 18], | [Confidence: 60.889648 Bounding box: 315 62 4 14], ae. [Confidence: 27.592728 Bounding box: 0 129 320 126], c [Confidence: 0.000000 Bounding box: 74 301 3 2], ai [Confidence: 24.988930 Bounding box: 133 283 41 11], ee [Confidence: 27.483231 Bounding box: 186 283 126 41]]


, , , , threshold , :



:



// convert to gray
Imgproc.cvtColor(mat, mat, Imgproc.COLOR_BGR2GRAY)
// text -> white, other -> black
Imgproc.threshold(mat, mat, 244.0, 255.0, Imgproc.THRESH_BINARY)
// inverse 
Core.bitwise_not(mat, mat)


( Imgcodecs.imwrite("output.jpg", mat) )





getWords, :



[WHEN [Confidence: 94.933418 Bounding box: 48 251 52 14], SHE [Confidence: 95.249252 Bounding box: 109 251 34 15], CATCHES [Confidence: 95.973259 Bounding box: 151 251 80 15], YOU [Confidence: 96.446579 Bounding box: 238 251 33 15], CHEATING [Confidence: 96.458656 Bounding box: 117 278 86 15]]


, .



:



import net.sourceforge.tess4j.ITessAPI
import net.sourceforge.tess4j.Tesseract
import nu.pattern.OpenCV
import org.opencv.core.Core
import org.opencv.core.CvType
import org.opencv.core.Mat
import org.opencv.imgproc.Imgproc
import java.awt.image.BufferedImage
import java.awt.image.DataBufferByte
import java.io.File
import java.net.URL
import javax.imageio.ImageIO

fun main() {
    setupOpenCV()
    setupTesseract()

    val image = ImageIO.read(URL("http://img.ifcdn.com/images/b313c1f095336b6d681f75888f8932fc8a531eacd4bc436e4d4aeff7b599b600_1.jpg"))
    val mat = image.toMat()

    Imgproc.cvtColor(mat, mat, Imgproc.COLOR_BGR2GRAY)
    Imgproc.threshold(mat, mat, 244.0, 255.0, Imgproc.THRESH_BINARY)
    Core.bitwise_not(mat, mat)

    val preparedImage = mat.toBufferedImage()

    val api = Tesseract()
    api.setDatapath("/usr/local/share/tessdata/")
    api.setLanguage("eng")

    val result = api.getWords(preparedImage, ITessAPI.TessPageIteratorLevel.RIL_WORD)
    println(result)
}

private fun setupTesseract() {
    val libPath = "/usr/local/lib"
    val libTess = File(libPath, "libtesseract.dylib")
    if (libTess.exists()) {
        val jnaLibPath = System.getProperty("jna.library.path")
        if (jnaLibPath == null) {
            System.setProperty("jna.library.path", libPath)
        } else {
            System.setProperty("jna.library.path", libPath + File.pathSeparator + jnaLibPath)
        }
    }
}

private fun setupOpenCV() {
    OpenCV.loadLocally()
}

private fun BufferedImage.toMat(): Mat {
    val pixels = (raster.dataBuffer as DataBufferByte).data
    return Mat(height, width, CvType.CV_8UC3)
        .apply { put(0, 0, pixels) }
}

private fun Mat.toBufferedImage(): BufferedImage {
    var type = BufferedImage.TYPE_BYTE_GRAY
    if (channels() > 1) {
        type = BufferedImage.TYPE_3BYTE_BGR
    }
    val bufferSize = channels() * cols() * rows()
    val b = ByteArray(bufferSize)
    this[0, 0, b] // get all the pixels
    val image = BufferedImage(cols(), rows(), type)
    val targetPixels = (image.raster.dataBuffer as DataBufferByte).data
    System.arraycopy(b, 0, targetPixels, 0, b.size)
    return image
}


Python-, . ( , , Mat BufferedImage).



Python OpenCV tesseract. Python , .



jvm- . , , . , , , , , jvm Kotlin .



Python , c, ML. . , . , - Python, , .



, - . , :



djl.ai — Deep Learning jvm, pytorch tensorflow

deeplearning4j.org — tensorflow keras

kotlinlang.org/docs/reference/data-science-overview — Data Science Kotlin ( Java)



.




All Articles