Mercurial > pymonkey
changeset 27:21045074139f
Based on my new understanding of JSString */jschar * thanks to folks on #jsapi, I've removed the requirement that SpiderMonkey be in UTF-8 mode to translate strings between Python and SpiderMonkey.
author | Atul Varma <varmaa@toolness.com> |
---|---|
date | Mon, 29 Jun 2009 13:33:07 -0700 |
parents | 9e33fc5a8d92 |
children | bd30f5c02fc3 |
files | context.c pymonkey.c test_pymonkey.py utils.c |
diffstat | 4 files changed, 23 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- a/context.c Mon Jun 29 10:35:06 2009 -0700 +++ b/context.c Mon Jun 29 13:33:07 2009 -0700 @@ -44,21 +44,15 @@ // can't be very efficient. PYM_JSObject *object; - char *string; + Py_UNICODE *string; - if (!JS_CStringsAreUTF8()) { - PyErr_SetString(PyExc_NotImplementedError, - "Data type conversion not implemented."); - return NULL; - } - - if (!PyArg_ParseTuple(args, "O!es", &PYM_JSObjectType, &object, - "utf-8", &string)) + if (!PyArg_ParseTuple(args, "O!u", &PYM_JSObjectType, &object, + &string)) return NULL; - JSString *jsString = JS_NewStringCopyZ(self->cx, string); + JSString *jsString = JS_NewUCStringCopyZ(self->cx, + (const jschar *) string); if (jsString == NULL) { - PyMem_Free(string); PyErr_SetString(PYM_error, "JS_NewStringCopyZ() failed"); return NULL; } @@ -69,12 +63,10 @@ JS_GetStringLength(jsString), &val)) { // TODO: Get the actual JS exception. Any exception that exists // here will probably still be pending on the JS context. - PyMem_Free(string); PyErr_SetString(PYM_error, "Getting property failed."); return NULL; } - PyMem_Free(string); return PYM_jsvalToPyObject(self, val); }
--- a/pymonkey.c Mon Jun 29 10:35:06 2009 -0700 +++ b/pymonkey.c Mon Jun 29 13:33:07 2009 -0700 @@ -11,9 +11,6 @@ PyMODINIT_FUNC initpymonkey(void) { - if (!JS_CStringsAreUTF8()) - JS_SetCStringsAreUTF8(); - PyObject *module; module = Py_InitModule("pymonkey", PYM_methods);
--- a/test_pymonkey.py Mon Jun 29 10:35:06 2009 -0700 +++ b/test_pymonkey.py Mon Jun 29 13:33:07 2009 -0700 @@ -28,10 +28,10 @@ obj = cx.new_object() cx.init_standard_classes(obj) cx.evaluate_script(obj, 'foo = {bar: 1}', '<string>', 1) - self.assertTrue(isinstance(cx.get_property(obj, "foo"), + self.assertTrue(isinstance(cx.get_property(obj, u"foo"), pymonkey.Object)) - self.assertTrue(cx.get_property(obj, "foo") is - cx.get_property(obj, "foo")) + self.assertTrue(cx.get_property(obj, u"foo") is + cx.get_property(obj, u"foo")) def testObjectGetattrWorks(self): cx = pymonkey.Runtime().new_context() @@ -39,10 +39,10 @@ cx.init_standard_classes(obj) cx.evaluate_script(obj, 'boop = 5', '<string>', 1) cx.evaluate_script(obj, 'this["blarg\u2026"] = 5', '<string>', 1) - self.assertEqual(cx.get_property(obj, "beans"), + self.assertEqual(cx.get_property(obj, u"beans"), pymonkey.undefined) self.assertEqual(cx.get_property(obj, u"blarg\u2026"), 5) - self.assertEqual(cx.get_property(obj, "boop"), 5) + self.assertEqual(cx.get_property(obj, u"boop"), 5) def testContextIsInstance(self): cx = pymonkey.Runtime().new_context() @@ -82,7 +82,7 @@ cx.init_standard_classes(obj) obj = cx.evaluate_script(obj, '({boop: 1})', '<string>', 1) self.assertTrue(isinstance(obj, pymonkey.Object)) - self.assertEqual(cx.get_property(obj, "boop"), 1) + self.assertEqual(cx.get_property(obj, u"boop"), 1) def testEvaluateReturnsTrue(self): self.assertTrue(self._evaljs('true') is True)
--- a/utils.c Mon Jun 29 10:35:06 2009 -0700 +++ b/utils.c Mon Jun 29 13:33:07 2009 -0700 @@ -27,17 +27,20 @@ if (JSVAL_IS_VOID(value)) Py_RETURN_UNDEFINED; - if (JSVAL_IS_STRING(value) && JS_CStringsAreUTF8()) { - // TODO: What to do if C strings aren't UTF-8? The jschar * - // type isn't actually UTF-16, it's just "UTF-16-ish", so - // there doesn't seem to be any other lossless way of - // transferring the string other than perhaps by transmitting - // its JSON representation. + if (JSVAL_IS_STRING(value)) { + // Strings in JS are funky: think of them as 16-bit versions of + // Python 2.x's 'str' type. Whether or not they're valid UTF-16 + // is entirely up to the client code. + // TODO: Instead of ignoring errors, consider actually treating + // the string as a raw character buffer. JSString *str = JSVAL_TO_STRING(value); - const char *bytes = JS_GetStringBytes(str); - const char *errors; - return PyUnicode_DecodeUTF8(bytes, strlen(bytes), errors); + const char *chars = (const char *) JS_GetStringChars(str); + size_t length = JS_GetStringLength(str); + + // We're multiplying length by two since Python wants the number + // of bytes, not the number of 16-bit characters. + return PyUnicode_DecodeUTF16(chars, length * 2, "ignore", NULL); } if (JSVAL_IS_OBJECT(value))