changeset 27:21045074139f

Based on my new understanding of JSString */jschar * thanks to folks on #jsapi, I've removed the requirement that SpiderMonkey be in UTF-8 mode to translate strings between Python and SpiderMonkey.
author Atul Varma <varmaa@toolness.com>
date Mon, 29 Jun 2009 13:33:07 -0700
parents 9e33fc5a8d92
children bd30f5c02fc3
files context.c pymonkey.c test_pymonkey.py utils.c
diffstat 4 files changed, 23 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/context.c	Mon Jun 29 10:35:06 2009 -0700
+++ b/context.c	Mon Jun 29 13:33:07 2009 -0700
@@ -44,21 +44,15 @@
   // can't be very efficient.
 
   PYM_JSObject *object;
-  char *string;
+  Py_UNICODE *string;
 
-  if (!JS_CStringsAreUTF8()) {
-    PyErr_SetString(PyExc_NotImplementedError,
-                    "Data type conversion not implemented.");
-    return NULL;
-  }
-
-  if (!PyArg_ParseTuple(args, "O!es", &PYM_JSObjectType, &object,
-                        "utf-8", &string))
+  if (!PyArg_ParseTuple(args, "O!u", &PYM_JSObjectType, &object,
+                        &string))
     return NULL;
 
-  JSString *jsString = JS_NewStringCopyZ(self->cx, string);
+  JSString *jsString = JS_NewUCStringCopyZ(self->cx,
+                                           (const jschar *) string);
   if (jsString == NULL) {
-    PyMem_Free(string);
     PyErr_SetString(PYM_error, "JS_NewStringCopyZ() failed");
     return NULL;
   }
@@ -69,12 +63,10 @@
                         JS_GetStringLength(jsString), &val)) {
     // TODO: Get the actual JS exception. Any exception that exists
     // here will probably still be pending on the JS context.
-    PyMem_Free(string);
     PyErr_SetString(PYM_error, "Getting property failed.");
     return NULL;
   }
 
-  PyMem_Free(string);
   return PYM_jsvalToPyObject(self, val);
 }
 
--- a/pymonkey.c	Mon Jun 29 10:35:06 2009 -0700
+++ b/pymonkey.c	Mon Jun 29 13:33:07 2009 -0700
@@ -11,9 +11,6 @@
 PyMODINIT_FUNC
 initpymonkey(void)
 {
-  if (!JS_CStringsAreUTF8())
-    JS_SetCStringsAreUTF8();
-
   PyObject *module;
 
   module = Py_InitModule("pymonkey", PYM_methods);
--- a/test_pymonkey.py	Mon Jun 29 10:35:06 2009 -0700
+++ b/test_pymonkey.py	Mon Jun 29 13:33:07 2009 -0700
@@ -28,10 +28,10 @@
         obj = cx.new_object()
         cx.init_standard_classes(obj)
         cx.evaluate_script(obj, 'foo = {bar: 1}', '<string>', 1)
-        self.assertTrue(isinstance(cx.get_property(obj, "foo"),
+        self.assertTrue(isinstance(cx.get_property(obj, u"foo"),
                                    pymonkey.Object))
-        self.assertTrue(cx.get_property(obj, "foo") is
-                        cx.get_property(obj, "foo"))
+        self.assertTrue(cx.get_property(obj, u"foo") is
+                        cx.get_property(obj, u"foo"))
 
     def testObjectGetattrWorks(self):
         cx = pymonkey.Runtime().new_context()
@@ -39,10 +39,10 @@
         cx.init_standard_classes(obj)
         cx.evaluate_script(obj, 'boop = 5', '<string>', 1)
         cx.evaluate_script(obj, 'this["blarg\u2026"] = 5', '<string>', 1)
-        self.assertEqual(cx.get_property(obj, "beans"),
+        self.assertEqual(cx.get_property(obj, u"beans"),
                          pymonkey.undefined)
         self.assertEqual(cx.get_property(obj, u"blarg\u2026"), 5)
-        self.assertEqual(cx.get_property(obj, "boop"), 5)
+        self.assertEqual(cx.get_property(obj, u"boop"), 5)
 
     def testContextIsInstance(self):
         cx = pymonkey.Runtime().new_context()
@@ -82,7 +82,7 @@
         cx.init_standard_classes(obj)
         obj = cx.evaluate_script(obj, '({boop: 1})', '<string>', 1)
         self.assertTrue(isinstance(obj, pymonkey.Object))
-        self.assertEqual(cx.get_property(obj, "boop"), 1)
+        self.assertEqual(cx.get_property(obj, u"boop"), 1)
 
     def testEvaluateReturnsTrue(self):
         self.assertTrue(self._evaljs('true') is True)
--- a/utils.c	Mon Jun 29 10:35:06 2009 -0700
+++ b/utils.c	Mon Jun 29 13:33:07 2009 -0700
@@ -27,17 +27,20 @@
   if (JSVAL_IS_VOID(value))
     Py_RETURN_UNDEFINED;
 
-  if (JSVAL_IS_STRING(value) && JS_CStringsAreUTF8()) {
-    // TODO: What to do if C strings aren't UTF-8?  The jschar *
-    // type isn't actually UTF-16, it's just "UTF-16-ish", so
-    // there doesn't seem to be any other lossless way of
-    // transferring the string other than perhaps by transmitting
-    // its JSON representation.
+  if (JSVAL_IS_STRING(value)) {
+    // Strings in JS are funky: think of them as 16-bit versions of
+    // Python 2.x's 'str' type.  Whether or not they're valid UTF-16
+    // is entirely up to the client code.
 
+    // TODO: Instead of ignoring errors, consider actually treating
+    // the string as a raw character buffer.
     JSString *str = JSVAL_TO_STRING(value);
-    const char *bytes = JS_GetStringBytes(str);
-    const char *errors;
-    return PyUnicode_DecodeUTF8(bytes, strlen(bytes), errors);
+    const char *chars = (const char *) JS_GetStringChars(str);
+    size_t length = JS_GetStringLength(str);
+
+    // We're multiplying length by two since Python wants the number
+    // of bytes, not the number of 16-bit characters.
+    return PyUnicode_DecodeUTF16(chars, length * 2, "ignore", NULL);
   }
 
   if (JSVAL_IS_OBJECT(value))