Typeerror: cant pickle _thread.rlock objects

Minoru Osuka

unread,

Oct 8, 2018, 10:54:14 PM10/8/18

to Whoosh

Hi,

I'm working on implement custom tokenizer with Whoosh.

My custom tokenizer and test code is following:

Custom tokenizer:

from whoosh.analysis import Tokenizer, Token
from whoosh.compat import text_type
from janome.tokenizer import Tokenizer as Janome

class JanomeTokenizer(Tokenizer):
    def __init__(self):
        self.tokenizer = Janome()

 
    def __call__(self, value, positions=False, chars=False, keeporiginal=False, removestops=True, start_pos=0,
                 start_char=0, tokenize=True, mode='', **kwargs):
        assert isinstance(value, text_type), '%s is not unicode' % repr(value)
 
        token = Token(positions, chars, removestops=removestops, mode=mode, **kwargs)
 
        if not tokenize:
            token.original = token.text = value
            token.boost = 1.0
            if positions:
                token.pos = start_pos
            if chars:
                token.startchar = start_char
                token.endchar = start_char + len(value)
            yield token
        else:
            pos = start_pos
            for janome_token in self.tokenizer.tokenize(value):
                token.text = janome_token.surface
                token.boost = 1.0
                if keeporiginal:
                    token.original = token.text
                token.stopped = False
                if positions:
                    token.pos = pos
                    pos += 1
                if chars:
                    token.startchar = start_char + janome_token.start
                    token.endchar = token.startchar + len(janome_token.surface)
                yield token

Test code:

import unittest
import os
from tempfile import TemporaryDirectory
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT
from whoosh.analysis.filters import LowercaseFilter
class TestSchema(unittest.TestCase):
    def setUp(self):
        self.temp_dir = TemporaryDirectory()
        self.index_dir = self.temp_dir.name + '/index'
 
    def tearDown(self):
        self.temp_dir.cleanup()
 
    def test_create_index(self):
        if self.index_dir is not None:
            os.makedirs(self.index_dir, exist_ok=True)
        analyzer = (JanomeTokenizer() | LowercaseFilter())
        schema = Schema(title=TEXT(analyzer=analyzer))
        ix = create_in(self.index_dir, schema)
        self.assertIsNotNone(ix)

But it occurred "TypeError: can't pickle _thread.lock objects" as following:

Testing started at 2:50 PM ...
/usr/local/bin/python3.7 /Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py --multiproc --qt-support=auto --client 127.0.0.1 --port 49765 --file /Applications/PyCharm.app/Contents/helpers/pycharm/_jb_unittest_runner.py --path /Users/mosuka/PycharmProjects/basilisk/tests/test_schema.py
pydev debugger: process 19470 is connecting
Connected to pydev debugger (build 182.4505.26)
Launching unittests with arguments python -m unittest /Users/mosuka/PycharmProjects/basilisk/tests/test_schema.py in /Users/mosuka/PycharmProjects/basilisk/tests
/usr/local/lib/python3.7/site-packages/yaml/constructor.py:126: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
  if not isinstance(key, collections.Hashable):
Error
Traceback (most recent call last):
  File "/usr/local/Cellar/python/3.7.0/Frameworks/Python.framework/Versions/3.7/lib/python3.7/unittest/case.py", line 59, in testPartExecutor
    yield
  File "/usr/local/Cellar/python/3.7.0/Frameworks/Python.framework/Versions/3.7/lib/python3.7/unittest/case.py", line 615, in run
    testMethod()
  File "/Users/mosuka/PycharmProjects/basilisk/tests/test_schema.py", line 53, in test_create_index
    ix = create_in(self.index_dir, schema)
  File "/usr/local/lib/python3.7/site-packages/whoosh/index.py", line 102, in create_in
    return FileIndex.create(storage, schema, indexname)
  File "/usr/local/lib/python3.7/site-packages/whoosh/index.py", line 425, in create
    TOC.create(storage, schema, indexname)
  File "/usr/local/lib/python3.7/site-packages/whoosh/index.py", line 611, in create
    toc.write(storage, indexname)
  File "/usr/local/lib/python3.7/site-packages/whoosh/index.py", line 688, in write
    stream.write_string(pickle.dumps(schema, 2))
TypeError: can't pickle _thread.lock objects

Ran 1 test in 4.069s
FAILED (errors=1)

Can anyone tell me how to solve it?

Environment:

  • Python 3.7.0
  • Whoosh 2.7.4
  • Janome 0.3.6

Thanks

Minoru Osuka

unread,

Oct 8, 2018, 11:46:20 PM10/8/18

to Whoosh

I had solve it.

it needed __getstate__ and __setstate__.

Thanks