/*
 * Copyright © 2017 - 2019 Atilika Inc. All rights reserved.
 */

import React, {memo} from "react";
import Code from "../../common/Code";
import Footer from "../../common/Footer";
import Intro from "../../common/Intro";
import Page from "../../common/Page";
import Meta from "../../common/Meta";
import Section from "../../common/Section";
import KuromojiDemo from "../../kuromoji/KuromojiDemoSection";
import KuromojiHero from "../../kuromoji/KuromojiHero";
import KuromojiSample from "../../kuromoji/KuromojiSample";
import Divider from "../../common/typography/Divider";
import {H1, H2} from "../../common/typography/Headings";

const KuromojiEn = () => (
    <Page lang="en" title="Kuromoji" path="/en/kuromoji">
        <Meta name="description">Open source Java morphological analyzer for Japanese.</Meta>
        <KuromojiHero>
            <H1>Kuromoji</H1>
            <p>Open source Java morphological analyzer for Japanese.</p>
        </KuromojiHero>

        <Intro title="Features" />

        <Section title="Word segmentation" grey>
            <p>
                Kuromoji can separate a block of text into distinct words, also known as morphemes.
            </p>
            <KuromojiSample>
                吾輩は猫である。
                <span>→</span>&nbsp;&nbsp; 吾輩&nbsp;&nbsp; は&nbsp;&nbsp; 猫&nbsp;&nbsp;
                で&nbsp;&nbsp; ある&nbsp;&nbsp; 。
            </KuromojiSample>
        </Section>

        <Section title="Part of speech tagging">
            <p>
                For each word, Kuromoji assigns a part of speech like noun, verb, adjective, and so
                on.
            </p>
            <KuromojiSample grey spaceRt>
                <ruby>
                    相撲<rt>noun</rt>を<rt>particle</rt>
                    見る<rt>verb</rt>の<rt>particle</rt>が<rt>particle</rt>
                    好き<rt>adjectival noun</rt>
                    です<rt>auxiliary verb</rt>。<rt>symbol</rt>
                </ruby>
            </KuromojiSample>
        </Section>

        <Section title="Lemmatization" grey>
            <p>Get the base form for inflected verbs and adjectives.</p>
            <KuromojiSample>
                <table>
                    <thead>
                        <tr>
                            <th>Surface Form</th>
                            <th>Base Form</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                            <td>食べたい</td>
                            <td>食べる</td>
                        </tr>
                        <tr>
                            <td>楽しくない</td>
                            <td>楽しい</td>
                        </tr>
                        <tr>
                            <td>帰りました</td>
                            <td>帰る</td>
                        </tr>
                    </tbody>
                </table>
            </KuromojiSample>
        </Section>

        <Section title="Readings">
            <p>Extract readings for kanji.</p>
            <KuromojiSample grey>
                <ruby>
                    親譲<rt>おやゆず</rt>
                </ruby>
                りの
                <ruby>
                    無鉄砲<rt>むてっぽう</rt>
                </ruby>
                で
                <ruby>
                    小供<rt>こども</rt>
                </ruby>
                の
                <ruby>
                    時<rt>とき</rt>
                </ruby>
                から
                <ruby>
                    損<rt>そん</rt>
                </ruby>
                <ruby>ばかりしている</ruby>
            </KuromojiSample>
        </Section>

        <Section title="Search segmentation mode" grey>
            <p>
                Kuromoji comes with a Search Mode for search applications, that does additional
                splitting of words to make sure you get hits when searching for compounds nouns.
            </p>

            <p>
                For example, we want a search for 空港 (airport) to match 関西国際空港 (Kansai
                International Airport), but most analyzers don’t allow this since 関西国際空港 tends
                to become one token.
            </p>
        </Section>

        <Section title="Dictionary support">
            <p>
                Kuromoji support a wide range of dictionary backends to support different use cases,
                including ipadic, jumandic, and unidic among others.
            </p>
        </Section>

        <Section title="Open Source" grey>
            <p>Kuromoji is licensed under the Apache License, Version 2.0.</p>
        </Section>

        <Section title="Search Integration">
            <p>
                Kuromoji powers the Japanese language support in Apache Lucene and Apache Solr. It
                also used in Elasticsearch.
            </p>
        </Section>

        <KuromojiDemo title="Demo" lang="en" />

        <Section title="Usage">
            <p>
                Kuromoji is an easy to use and self-contained Japanese morphological analyzer that
                does:
            </p>
            <ul>
                <li>Word segmentation. Segmenting text into words (or morphemes)</li>
                <li>
                    Part-of-speech tagging. Assign word-categories (nouns, verbs, particles,
                    adjectives, etc.)
                </li>
                <li>Lemmatization. Get dictionary forms for inflected verbs and adjectives</li>
                <li>Readings. Extract readings for kanji</li>
            </ul>
            <p>
                Several other features are supported. Please consult each dictionaries’ Token class
                for details.
            </p>

            <Divider />

            <H2>Using Kuromoji</H2>
            <p>
                The example below shows how to use the Kuromoji morphological analyzer in its
                simplest form; to segment text into tokens and output features for each token.
            </p>
            <Code lang="java">{`
                package com.atilika.kuromoji.example;

                import com.atilika.kuromoji.ipadic.Token;
                import com.atilika.kuromoji.ipadic.Tokenizer;
                import java.util.List;

                public class KuromojiExample {
                    public static void main(String[] args) {
                        Tokenizer tokenizer = new Tokenizer() ;
                        List<Token> tokens = tokenizer.tokenize("お寿司が食べたい。");
                        for (Token token : tokens) {
                            System.out.println(token.getSurface() + "\\t" + token.getAllFeatures());
                        }
                    }
                }
            `}</Code>
            <p>
                Make sure you add the dependency below to your pom.xml before building your project.
            </p>
            <Code lang="xml">{`
                <dependency>
                    <groupId>com.atilika.kuromoji</groupId>
                    <artifactId>kuromoji-ipadic</artifactId>
                    <version>0.9.0</version>
                </dependency>
            `}</Code>
        </Section>

        <Footer lang="en" />
    </Page>
);

export default memo(KuromojiEn);
