diff --git a/src/half.h b/src/half.h
index 10f9ec54..a3908017 100644
--- a/src/half.h
+++ b/src/half.h
@@ -28,7 +28,9 @@ public:
     /* Constructors. Always inline so that the code can work in registers
      * instead of calling routines with the hidden "this" parameter. */
     inline half() { }
+    inline half(int f) { *this = makefast(f); }
     inline half(float f) { *this = makefast(f); }
+    inline half(double f) { *this = makefast(f); }
 
     inline int is_nan() const
     {
@@ -50,9 +52,12 @@ public:
         return (is_finite() && (bits & 0x7c00u)) || ((bits & 0x7fffu) == 0);
     }
 
-    /* Cast to other types */
-    inline operator float() const { return tofloat(*this); }
+    /* Cast to other types -- always inline, see constructors */
+    inline half &operator =(int f) { return *this = makefast(f); }
+    inline half &operator =(float f) { return *this = makefast(f); }
+    inline half &operator =(double f) { return *this = makefast(f); }
     inline operator int() const { return (int)tofloat(*this); }
+    inline operator float() const { return tofloat(*this); }
 
     static float tofloat(half h);
 
diff --git a/src/matrix.cpp b/src/matrix.cpp
index 16d5ab14..1d90da24 100644
--- a/src/matrix.cpp
+++ b/src/matrix.cpp
@@ -22,6 +22,52 @@ using namespace std;
 namespace lol
 {
 
+template<> float dot(vec2 v1, vec2 v2)
+{
+    return v1.x * v2.x + v1.y * v2.y;
+}
+
+template<> float dot(vec3 v1, vec3 v2)
+{
+    return v1.x * v2.x + v1.y * v2.y + v1.z * v2.z;
+}
+
+template<> float dot(vec4 v1, vec4 v2)
+{
+    return v1.x * v2.x + v1.y * v2.y + v1.z * v2.z + v1.w * v2.w;
+}
+
+template<> vec3 cross(vec3 v1, vec3 v2)
+{
+    return vec3(v1.y * v2.z - v1.z * v2.y,
+                v1.z * v2.x - v1.x * v2.z,
+                v1.x * v2.y - v1.y * v2.x);
+}
+
+template<> vec2 normalize(vec2 v)
+{
+    float norm = v.sqlen();
+    if (!norm)
+        return vec2(0);
+    return v / norm;
+}
+
+template<> vec3 normalize(vec3 v)
+{
+    float norm = v.sqlen();
+    if (!norm)
+        return vec3(0);
+    return v / norm;
+}
+
+template<> vec4 normalize(vec4 v)
+{
+    float norm = v.sqlen();
+    if (!norm)
+        return vec4(0);
+    return v / norm;
+}
+
 static inline float det3(float a, float b, float c,
                          float d, float e, float f,
                          float g, float h, float i)
@@ -66,6 +112,21 @@ template<> mat4 mat4::invert() const
     return ret;
 }
 
+template<> void vec2::printf() const
+{
+    Log::Debug("[ %6.6f %6.6f ]\n", x, y);
+}
+
+template<> void vec3::printf() const
+{
+    Log::Debug("[ %6.6f %6.6f %6.6f ]\n", x, y, z);
+}
+
+template<> void vec4::printf() const
+{
+    Log::Debug("[ %6.6f %6.6f %6.6f %6.6f ]\n", x, y, z, w);
+}
+
 template<> void mat4::printf() const
 {
     mat4 const &p = *this;
@@ -163,15 +224,6 @@ template<> mat4 mat4::frustum(float left, float right, float bottom,
     return ret;
 }
 
-template<> mat4 mat4::perspective(float theta, float width,
-                                  float height, float near, float far)
-{
-    float t1 = tanf(theta / 2.0f);
-    float t2 = t1 * height / width;
-
-    return frustum(-near * t1, near * t1, -near * t2, near * t2, near, far);
-}
-
 template<> mat4 mat4::translate(float x, float y, float z)
 {
     mat4 ret(1.0f);
@@ -181,6 +233,40 @@ template<> mat4 mat4::translate(float x, float y, float z)
     return ret;
 }
 
+template<> mat4 mat4::translate(vec3 v)
+{
+    return translate(v.x, v.y, v.z);
+}
+
+template<> mat4 mat4::lookat(vec3 eye, vec3 center, vec3 up)
+{
+    vec3 f = normalize(center - eye);
+    vec3 u = normalize(up);
+    vec3 s = normalize(cross(f, u));
+    u = cross(s, f);
+
+    mat4 ret(1.0f);
+    ret[0][0] = s.x;
+    ret[0][1] = s.y;
+    ret[0][2] = s.z;
+    ret[1][0] = u.x;
+    ret[1][1] = u.y;
+    ret[1][2] = u.z;
+    ret[2][0] =-f.x;
+    ret[2][1] =-f.y;
+    ret[2][2] =-f.z;
+    return ret * mat4::translate(-eye);
+}
+
+template<> mat4 mat4::perspective(float theta, float width,
+                                  float height, float near, float far)
+{
+    float t1 = tanf(theta * 0.5f);
+    float t2 = t1 * height / width;
+
+    return frustum(-near * t1, near * t1, -near * t2, near * t2, near, far);
+}
+
 template<> mat4 mat4::rotate(float theta, float x, float y, float z)
 {
     float st = sinf(theta);
@@ -213,5 +299,10 @@ template<> mat4 mat4::rotate(float theta, float x, float y, float z)
     return ret;
 }
 
+template<> mat4 mat4::rotate(float theta, vec3 v)
+{
+    return rotate(theta, v.x, v.y, v.z);
+}
+
 } /* namespace lol */
 
diff --git a/src/matrix.h b/src/matrix.h
index 310e8dfc..01f14f52 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -113,7 +113,14 @@ namespace lol
     { \
         using namespace std; \
         return sqrtf((float)sqlen()); \
-    }
+    } \
+    \
+    template<typename U> \
+    friend U dot(Vec##elems<U>, Vec##elems<U>); \
+    template<typename U> \
+    friend Vec##elems<U> normalize(Vec##elems<U>); \
+    \
+    void printf() const;
 
 #define SWIZZLE2(e1, e2) \
     inline Vec2<T> e1##e2() const \
@@ -205,8 +212,16 @@ template <typename T> struct Vec2
     union { T y; T b; T j; };
 };
 
+typedef Vec2<half> f16vec2;
 typedef Vec2<float> vec2;
-typedef Vec2<int> ivec2;
+typedef Vec2<int8_t> i8vec2;
+typedef Vec2<uint8_t> u8vec2;
+typedef Vec2<int16_t> i16vec2;
+typedef Vec2<uint16_t> u16vec2;
+typedef Vec2<int32_t> ivec2;
+typedef Vec2<uint32_t> uvec2;
+typedef Vec2<int64_t> i64vec2;
+typedef Vec2<uint64_t> u64vec2;
 
 /*
  * 3-element vectors
@@ -226,6 +241,9 @@ template <typename T> struct Vec3
     SWIZZLE333(x); SWIZZLE333(y); SWIZZLE333(z);
     SWIZZLE4333(x); SWIZZLE4333(y); SWIZZLE4333(z);
 
+    template<typename U>
+    friend Vec3<U> cross(Vec3<U>, Vec3<U>);
+
 #if !defined __ANDROID__
     template<typename U>
     friend std::ostream &operator<<(std::ostream &stream, Vec3<U> const &v);
@@ -236,8 +254,16 @@ template <typename T> struct Vec3
     union { T z; T c; T k; };
 };
 
+typedef Vec3<half> f16vec3;
 typedef Vec3<float> vec3;
-typedef Vec3<int> ivec3;
+typedef Vec3<int8_t> i8vec3;
+typedef Vec3<uint8_t> u8vec3;
+typedef Vec3<int16_t> i16vec3;
+typedef Vec3<uint16_t> u16vec3;
+typedef Vec3<int32_t> ivec3;
+typedef Vec3<uint32_t> uvec3;
+typedef Vec3<int64_t> i64vec3;
+typedef Vec3<uint64_t> u64vec3;
 
 /*
  * 4-element vectors
@@ -272,8 +298,16 @@ template <typename T> struct Vec4
     union { T w; T d; T l; };
 };
 
+typedef Vec4<half> f16vec4;
 typedef Vec4<float> vec4;
-typedef Vec4<int> ivec4;
+typedef Vec4<int8_t> i8vec4;
+typedef Vec4<uint8_t> u8vec4;
+typedef Vec4<int16_t> i16vec4;
+typedef Vec4<uint16_t> u16vec4;
+typedef Vec4<int32_t> ivec4;
+typedef Vec4<uint32_t> uvec4;
+typedef Vec4<int64_t> i64vec4;
+typedef Vec4<uint64_t> u64vec4;
 
 #define SCALAR_GLOBAL(elems, op, U) \
     template<typename T> \
@@ -326,9 +360,12 @@ template <typename T> struct Mat4
 
     static Mat4<T> ortho(T left, T right, T bottom, T top, T near, T far);
     static Mat4<T> frustum(T left, T right, T bottom, T top, T near, T far);
+    static Mat4<T> lookat(Vec3<T> eye, Vec3<T> center, Vec3<T> up);
     static Mat4<T> perspective(T theta, T width, T height, T near, T far);
     static Mat4<T> translate(T x, T y, T z);
+    static Mat4<T> translate(Vec3<T> v);
     static Mat4<T> rotate(T theta, T x, T y, T z);
+    static Mat4<T> rotate(T theta, Vec3<T> v);
 
     void printf() const;
 
@@ -400,8 +437,16 @@ template <typename T> struct Mat4
     Vec4<T> v[4];
 };
 
+typedef Mat4<half> f16mat4;
 typedef Mat4<float> mat4;
-typedef Mat4<int> imat4;
+typedef Mat4<int8_t> i8mat4;
+typedef Mat4<uint8_t> u8mat4;
+typedef Mat4<int16_t> i16mat4;
+typedef Mat4<uint16_t> u16mat4;
+typedef Mat4<int32_t> imat4;
+typedef Mat4<uint32_t> umat4;
+typedef Mat4<int64_t> i64mat4;
+typedef Mat4<uint64_t> u64mat4;
 
 } /* namespace lol */
 
diff --git a/test/unit/build.cpp b/test/unit/build.cpp
index 7199a16a..dfd29f96 100644
--- a/test/unit/build.cpp
+++ b/test/unit/build.cpp
@@ -20,6 +20,56 @@ namespace lol
 
 LOLUNIT_FIXTURE(BuildTest)
 {
+    LOLUNIT_TEST(TypeSize)
+    {
+        LOLUNIT_ASSERT_EQUAL(sizeof(half), 2);
+        //LOLUNIT_ASSERT_EQUAL(sizeof(f16vec2), 4);
+        //LOLUNIT_ASSERT_EQUAL(sizeof(f16vec3), 6);
+        //LOLUNIT_ASSERT_EQUAL(sizeof(f16vec4), 8);
+        //LOLUNIT_ASSERT_EQUAL(sizeof(f16mat4), 32);
+
+        LOLUNIT_ASSERT_EQUAL(sizeof(float), 4);
+        LOLUNIT_ASSERT_EQUAL(sizeof(vec2), 8);
+        LOLUNIT_ASSERT_EQUAL(sizeof(vec3), 12);
+        LOLUNIT_ASSERT_EQUAL(sizeof(vec4), 16);
+
+        LOLUNIT_ASSERT_EQUAL(sizeof(i8vec2), 2);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u8vec2), 2);
+        LOLUNIT_ASSERT_EQUAL(sizeof(i16vec2), 4);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u16vec2), 4);
+        LOLUNIT_ASSERT_EQUAL(sizeof(ivec2), 8);
+        LOLUNIT_ASSERT_EQUAL(sizeof(uvec2), 8);
+        LOLUNIT_ASSERT_EQUAL(sizeof(i64vec2), 16);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u64vec2), 16);
+
+        LOLUNIT_ASSERT_EQUAL(sizeof(i8vec3), 3);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u8vec3), 3);
+        LOLUNIT_ASSERT_EQUAL(sizeof(i16vec3), 6);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u16vec3), 6);
+        LOLUNIT_ASSERT_EQUAL(sizeof(ivec3), 12);
+        LOLUNIT_ASSERT_EQUAL(sizeof(uvec3), 12);
+        LOLUNIT_ASSERT_EQUAL(sizeof(i64vec3), 24);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u64vec3), 24);
+
+        LOLUNIT_ASSERT_EQUAL(sizeof(i8vec4), 4);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u8vec4), 4);
+        LOLUNIT_ASSERT_EQUAL(sizeof(i16vec4), 8);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u16vec4), 8);
+        LOLUNIT_ASSERT_EQUAL(sizeof(ivec4), 16);
+        LOLUNIT_ASSERT_EQUAL(sizeof(uvec4), 16);
+        LOLUNIT_ASSERT_EQUAL(sizeof(i64vec4), 32);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u64vec4), 32);
+
+        LOLUNIT_ASSERT_EQUAL(sizeof(i8mat4), 16);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u8mat4), 16);
+        LOLUNIT_ASSERT_EQUAL(sizeof(i16mat4), 32);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u16mat4), 32);
+        LOLUNIT_ASSERT_EQUAL(sizeof(imat4), 64);
+        LOLUNIT_ASSERT_EQUAL(sizeof(umat4), 64);
+        LOLUNIT_ASSERT_EQUAL(sizeof(i64mat4), 128);
+        LOLUNIT_ASSERT_EQUAL(sizeof(u64mat4), 128);
+    }
+
 #if !defined LOL_DEBUG
     LOLUNIT_TEST(FastMath)
     {