论坛首页 Java企业应用论坛

something about stardict file

浏览 1791 次
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
作者 正文
   发表时间:2009-03-23  

    private static byte[] loadFile(String file) throws IOException
    {
        // save data to binary stream
        ByteArrayOutputStream byteStream = null;
        // read file to stream
        InputStream in = null;
        // binary data which get from file
        byte[] data = null;

        try
        {
            byteStream = new ByteArrayOutputStream();
            if (mIdxFile.equals(file))
            {
                in = new BufferedInputStream(new FileInputStream(file));
            }
            else
            {
                in = new GZIPInputStream(new FileInputStream(file));
            }
            byte[] buf = new byte[1024];
            int numRead = 0;
            while((numRead = in.read(buf)) != -1)
            {
                byteStream.write(buf, 0, numRead);
            }
            data = byteStream.toByteArray();
        }
        finally
        {
            in.close();
            byteStream.close();
        }

        return data;

    }

    /*
     * index file structure
     * word + '\0' (1 byte) + data offset in dict file (4 bytes) + data size in dict file (4 bytes)
     *
     * */
    private static List<StarDictWord> loadDictIndex() throws IOException
    {
        List<StarDictWord> words = new ArrayList<StarDictWord>();
        byte[] splitByte = SPLIT_CHAR.getBytes();
        int currentIndex = 0;
        int dataIndex = 0;
        byte[] data = loadFile(mIdxFile);
        int dataLength = data.length;

        while (currentIndex < dataLength)
        {
            // search for '\0'
            if (data[currentIndex] == splitByte[0])
            {
                // 4 bytes for data offset and 4 bytes for data size in index file
                if (currentIndex + 8 < dataLength)
                {
                    // data size in index file
                    int dataSize = currentIndex - dataIndex;
                    StarDictWord word = new StarDictWord();
                    byte[] wordData = new byte[dataSize];
                    for (int i = 0; i < dataSize; i++)
                    {
                        wordData[i] = data[i + dataIndex];
                    }

                    byte[] dataOffsetByte = new byte[4];
                    byte[] dataSizeByte = new byte[4];
                    for (int i = 1; i < 4; i++)
                    {
                        dataOffsetByte[i] = data[currentIndex + i + 1];
                        dataSizeByte[i] = data[currentIndex + i + 5];
                    }

                    try
                    {
                        String content = new String(wordData, "utf-8");
                        word.setContent(content);
                        word.setDictName(mDictName);
                        word.setDictFileOffset(ByteBuffer.wrap(dataOffsetByte).getInt());
                        word.setDictFileSize(ByteBuffer.wrap(dataSizeByte).getInt());
                        words.add(word);
                    }
                    catch (Exception e)
                    {
                        logger.severe("error in " + dataIndex);
                    }
                }
                currentIndex += 8;
                // reset data index
                dataIndex = currentIndex + 1;
            }

            currentIndex += 1;
        }

        return words;
    }

    private static void loadDictData(List<StarDictWord> words) throws IOException
    {
        // skip for empty list
        if (words.size() == 0)
        {
            return;
        }

        byte[] data = loadFile(mDictFile);
        int dataLength = data.length;
        int offset = 0;
        int size = 0;

        for(StarDictWord word : words)
        {
            offset = word.getDictFileOffset();
            size = word.getDictFileSize();
            if (offset + size - 1 < dataLength)
            {
                byte[] wordData = new byte[size];
                for (int i = 0; i < size; i++)
                {
                    wordData[i] = data[i + offset];
                }
                loadWordData(word, wordData);
            }

        }
   

references
  1. http://stardict.sourceforge.net
  2. http://www.ohloh.net/p/pystardict
论坛首页 Java企业应用版

跳转论坛:
Global site tag (gtag.js) - Google Analytics