MBenchAnnotation / sampling /task_pool.json
studyOverflow's picture
feat: task pool with prompt_interaction on all subsets + updated questions
516976e verified
{
"metadata": {
"generated_at": "2026-05-12",
"seed": 42,
"n_models": 6,
"n_pairs": 15,
"n_annotators": 14,
"annotators_per_task": 3,
"total_unique_tasks": 280,
"total_assignments": 840,
"video_version": "401f",
"camera_motions": [
"left_then_right",
"right_then_left"
],
"camera_motion_note": "All 401f files are named left_then_right.mp4, but actual content depends on sample position: first half = left_then_right, second half = right_then_left"
},
"models": [
"hy_worldplay",
"infinite_world",
"lingbot_world",
"matrix_game_2",
"matrix_game_3",
"yume"
],
"subsets": {
"environment": {
"n_samples": 20,
"pairs_per_sample": 5,
"total_tasks": 100,
"dimensions": [
[
"prompt_interaction",
"Prompt Interaction",
"阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?"
],
[
"action_interaction",
"Action Interaction",
"预期的相机运动见上方示意图。哪个视频的相机运动更符合这个要求?"
],
[
"viewpoint",
"Viewpoint Consistency",
"相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?"
],
[
"layout",
"Global Layout Consistency",
"相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?"
],
[
"style",
"Style Consistency",
"视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?"
],
[
"lighting",
"Lighting Consistency",
"视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
]
],
"selected_samples": [
"sample_247_b0456067",
"sample_041_2ab42906",
"sample_013_154c34de",
"sample_284_bff22dc0",
"sample_110_ad462509",
"sample_093_44e5479c",
"sample_084_49ab8f06",
"sample_049_dccd53de",
"sample_283_d7ff95ed",
"sample_038_bfc4e110",
"sample_262_10a2b882",
"sample_212_7f0b6dfe",
"sample_033_325d87a2",
"sample_230_f55277fe",
"sample_169_6c0877dd",
"sample_015_93988d73",
"sample_014_6c4624bc",
"sample_034_221e4058",
"sample_082_1bbdf598",
"sample_087_93ceda13"
]
},
"object": {
"n_samples": 15,
"pairs_per_sample": 4,
"total_tasks": 60,
"dimensions": [
[
"prompt_interaction",
"Prompt Interaction",
"阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?"
],
[
"action_interaction",
"Action Interaction",
"预期的相机运动见上方示意图。哪个视频的相机运动更符合这个要求?"
],
[
"geometry",
"Geometry Consistency",
"关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?"
],
[
"texture",
"Texture Consistency",
"关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
]
],
"selected_samples": [
"sample_182_74b804c6",
"sample_229_05ed3fe6",
"sample_008_93bb110c",
"sample_211_665b36fb",
"sample_077_fffd4177",
"sample_272_f1ea4671",
"sample_249_2620f1d7",
"sample_265_ab6d3959",
"sample_209_b7a80278",
"sample_154_11880be2",
"sample_089_08e2d55f",
"sample_162_0add46c6",
"sample_225_d1bb1d52",
"sample_103_74c54180",
"sample_003_a51f0378"
]
},
"human": {
"n_samples": 15,
"pairs_per_sample": 4,
"total_tasks": 60,
"dimensions": [
[
"prompt_interaction",
"Prompt Interaction",
"阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?"
],
[
"action_interaction",
"Action Interaction",
"预期的相机运动见上方示意图。哪个视频的相机运动更符合这个要求?"
],
[
"identity",
"Identity Consistency",
"人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?"
],
[
"appearance",
"Appearance Consistency",
"人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
]
],
"selected_samples": [
"mem_openhumanvid_4e5e1c0db89294099c90db5e79182763b7289b0501c9f4b87ccec06187a22c53",
"mem_openhumanvid_a4bb26585db8a889039fb506d2bb4951fb4804d609e03426372abe70276bc212",
"mem_openhumanvid_7b09258d7c70665277b392483364194c07b779417e8db58cdc07986f8784de06",
"mem_openhumanvid_6b0096237a984d01469ed6f68759743e35779783fcec334e0a0793ed682ec9b2",
"mem_openhumanvid_482a8459664c56747f22f8458aa38f09a449a4d63ac60817cb176673cbc40f5c",
"mem_openhumanvid_57e0cb96d5bbab5d8e4241dedfd509901e1e49b4b16d071dd2c83d6a28821067",
"mem_openhumanvid_eaa7f4eb002d54e263471f4da094e428caad90be2dbb734f85ee17a1bf63b168",
"mem_openhumanvid_3b1b26c1d1c5783d72f01570a0b042df272b9f9164f2994c4195efea46483da0",
"mem_openhumanvid_3aea5f921baa2797ef215af691b392f87fcf39680db2bd1c50b7838614dc54f1",
"mem_openhumanvid_57a4cd658dd761e08cf3f8e32d2a6e3fa828be08abee3171a21043e741a8449e",
"mem_openhumanvid_1f35e1c32209f2acdcec26a4e560e1cc7e1fdaad59f891bc03f2b157226c771d",
"mem_openhumanvid_56494681253156a68d48e6373aaf1f88e76740d395dde7f74b6643da8a46fb7d",
"mem_openhumanvid_ede8b958a02c1c954adf48b32693b09368ea497223b4518453c7c800a2a4b9b5",
"mem_openhumanvid_b23b88de349c55e07f0d678a79e0f5b777848da7a12184fbad56a6e3931bc960",
"mem_openhumanvid_73ed464ba39c63e4ff1de18c3d429a351571ef0549c9012e3f4a6cfad0f0eef5"
]
},
"causal": {
"n_samples": 15,
"pairs_per_sample": 4,
"total_tasks": 60,
"dimensions": [
[
"prompt_interaction",
"Prompt Interaction",
"阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?"
],
[
"action_interaction",
"Action Interaction",
"预期的相机运动见上方示意图。哪个视频的相机运动更符合这个要求?"
],
[
"state_progress",
"State Progress",
"相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?"
],
[
"physical_plausibility",
"Physical Plausibility",
"相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
]
],
"selected_samples": [
"a00325_00018",
"a00060_01459",
"a00863_00430",
"a00606_01009",
"a00655_00572",
"a00157_00451",
"a00528_02524",
"a00110_01560",
"a00670_01404",
"a00361_00060",
"a00764_02393",
"a00742_01453",
"a00464_00063",
"a00685_00148",
"a00208_01445"
]
}
},
"tasks": [
{
"task_id": "task_0000",
"subset": "environment",
"sample_id": "sample_247_b0456067",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A brightly lit, floral-walled room features a woman cradling a baby, surrounded by a white crib and a small table with a lamp, evoking a tender atmosphere."
},
{
"task_id": "task_0001",
"subset": "environment",
"sample_id": "sample_247_b0456067",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A brightly lit, floral-walled room features a woman cradling a baby, surrounded by a white crib and a small table with a lamp, evoking a tender atmosphere."
},
{
"task_id": "task_0002",
"subset": "environment",
"sample_id": "sample_247_b0456067",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A brightly lit, floral-walled room features a woman cradling a baby, surrounded by a white crib and a small table with a lamp, evoking a tender atmosphere."
},
{
"task_id": "task_0003",
"subset": "environment",
"sample_id": "sample_247_b0456067",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A brightly lit, floral-walled room features a woman cradling a baby, surrounded by a white crib and a small table with a lamp, evoking a tender atmosphere."
},
{
"task_id": "task_0004",
"subset": "environment",
"sample_id": "sample_247_b0456067",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A brightly lit, floral-walled room features a woman cradling a baby, surrounded by a white crib and a small table with a lamp, evoking a tender atmosphere."
},
{
"task_id": "task_0005",
"subset": "environment",
"sample_id": "sample_041_2ab42906",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene forest path winds through dense greenery, framed by towering trees and lush undergrowth, bathed in soft, diffused light that enhances the tranquil, immersive atmosphere."
},
{
"task_id": "task_0006",
"subset": "environment",
"sample_id": "sample_041_2ab42906",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene forest path winds through dense greenery, framed by towering trees and lush undergrowth, bathed in soft, diffused light that enhances the tranquil, immersive atmosphere."
},
{
"task_id": "task_0007",
"subset": "environment",
"sample_id": "sample_041_2ab42906",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene forest path winds through dense greenery, framed by towering trees and lush undergrowth, bathed in soft, diffused light that enhances the tranquil, immersive atmosphere."
},
{
"task_id": "task_0008",
"subset": "environment",
"sample_id": "sample_041_2ab42906",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene forest path winds through dense greenery, framed by towering trees and lush undergrowth, bathed in soft, diffused light that enhances the tranquil, immersive atmosphere."
},
{
"task_id": "task_0009",
"subset": "environment",
"sample_id": "sample_041_2ab42906",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene forest path winds through dense greenery, framed by towering trees and lush undergrowth, bathed in soft, diffused light that enhances the tranquil, immersive atmosphere."
},
{
"task_id": "task_0010",
"subset": "environment",
"sample_id": "sample_013_154c34de",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene indoor scene features a fair-skinned woman standing before a cluttered bookshelf, surrounded by books, photos, and trinkets, under warm lighting that enhances the calm, contemplative mood."
},
{
"task_id": "task_0011",
"subset": "environment",
"sample_id": "sample_013_154c34de",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene indoor scene features a fair-skinned woman standing before a cluttered bookshelf, surrounded by books, photos, and trinkets, under warm lighting that enhances the calm, contemplative mood."
},
{
"task_id": "task_0012",
"subset": "environment",
"sample_id": "sample_013_154c34de",
"camera_motion": "left_then_right",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene indoor scene features a fair-skinned woman standing before a cluttered bookshelf, surrounded by books, photos, and trinkets, under warm lighting that enhances the calm, contemplative mood."
},
{
"task_id": "task_0013",
"subset": "environment",
"sample_id": "sample_013_154c34de",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene indoor scene features a fair-skinned woman standing before a cluttered bookshelf, surrounded by books, photos, and trinkets, under warm lighting that enhances the calm, contemplative mood."
},
{
"task_id": "task_0014",
"subset": "environment",
"sample_id": "sample_013_154c34de",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene indoor scene features a fair-skinned woman standing before a cluttered bookshelf, surrounded by books, photos, and trinkets, under warm lighting that enhances the calm, contemplative mood."
},
{
"task_id": "task_0015",
"subset": "environment",
"sample_id": "sample_284_bff22dc0",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene, overcast landscape features a winding road flanked by layered rock formations, with parked vehicles adding a sense of quiet human presence amid the natural expanse."
},
{
"task_id": "task_0016",
"subset": "environment",
"sample_id": "sample_284_bff22dc0",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene, overcast landscape features a winding road flanked by layered rock formations, with parked vehicles adding a sense of quiet human presence amid the natural expanse."
},
{
"task_id": "task_0017",
"subset": "environment",
"sample_id": "sample_284_bff22dc0",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene, overcast landscape features a winding road flanked by layered rock formations, with parked vehicles adding a sense of quiet human presence amid the natural expanse."
},
{
"task_id": "task_0018",
"subset": "environment",
"sample_id": "sample_284_bff22dc0",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene, overcast landscape features a winding road flanked by layered rock formations, with parked vehicles adding a sense of quiet human presence amid the natural expanse."
},
{
"task_id": "task_0019",
"subset": "environment",
"sample_id": "sample_284_bff22dc0",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene, overcast landscape features a winding road flanked by layered rock formations, with parked vehicles adding a sense of quiet human presence amid the natural expanse."
},
{
"task_id": "task_0020",
"subset": "environment",
"sample_id": "sample_110_ad462509",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene mountain village with solar-paneled homes, greenery, and a paved road under overcast skies exudes peaceful rural charm and natural beauty."
},
{
"task_id": "task_0021",
"subset": "environment",
"sample_id": "sample_110_ad462509",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene mountain village with solar-paneled homes, greenery, and a paved road under overcast skies exudes peaceful rural charm and natural beauty."
},
{
"task_id": "task_0022",
"subset": "environment",
"sample_id": "sample_110_ad462509",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene mountain village with solar-paneled homes, greenery, and a paved road under overcast skies exudes peaceful rural charm and natural beauty."
},
{
"task_id": "task_0023",
"subset": "environment",
"sample_id": "sample_110_ad462509",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene mountain village with solar-paneled homes, greenery, and a paved road under overcast skies exudes peaceful rural charm and natural beauty."
},
{
"task_id": "task_0024",
"subset": "environment",
"sample_id": "sample_110_ad462509",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene mountain village with solar-paneled homes, greenery, and a paved road under overcast skies exudes peaceful rural charm and natural beauty."
},
{
"task_id": "task_0025",
"subset": "environment",
"sample_id": "sample_093_44e5479c",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sunlit urban square features a modern sculpture, patterned stone paving, and contemporary buildings, set in a calm, pedestrian-friendly environment under overcast skies."
},
{
"task_id": "task_0026",
"subset": "environment",
"sample_id": "sample_093_44e5479c",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sunlit urban square features a modern sculpture, patterned stone paving, and contemporary buildings, set in a calm, pedestrian-friendly environment under overcast skies."
},
{
"task_id": "task_0027",
"subset": "environment",
"sample_id": "sample_093_44e5479c",
"camera_motion": "left_then_right",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sunlit urban square features a modern sculpture, patterned stone paving, and contemporary buildings, set in a calm, pedestrian-friendly environment under overcast skies."
},
{
"task_id": "task_0028",
"subset": "environment",
"sample_id": "sample_093_44e5479c",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sunlit urban square features a modern sculpture, patterned stone paving, and contemporary buildings, set in a calm, pedestrian-friendly environment under overcast skies."
},
{
"task_id": "task_0029",
"subset": "environment",
"sample_id": "sample_093_44e5479c",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sunlit urban square features a modern sculpture, patterned stone paving, and contemporary buildings, set in a calm, pedestrian-friendly environment under overcast skies."
},
{
"task_id": "task_0030",
"subset": "environment",
"sample_id": "sample_084_49ab8f06",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A bright, modern indoor walkway buzzes with pedestrian activity, framed by sterile architecture and neutral lighting that highlights the everyday rhythm of urban life."
},
{
"task_id": "task_0031",
"subset": "environment",
"sample_id": "sample_084_49ab8f06",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A bright, modern indoor walkway buzzes with pedestrian activity, framed by sterile architecture and neutral lighting that highlights the everyday rhythm of urban life."
},
{
"task_id": "task_0032",
"subset": "environment",
"sample_id": "sample_084_49ab8f06",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A bright, modern indoor walkway buzzes with pedestrian activity, framed by sterile architecture and neutral lighting that highlights the everyday rhythm of urban life."
},
{
"task_id": "task_0033",
"subset": "environment",
"sample_id": "sample_084_49ab8f06",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A bright, modern indoor walkway buzzes with pedestrian activity, framed by sterile architecture and neutral lighting that highlights the everyday rhythm of urban life."
},
{
"task_id": "task_0034",
"subset": "environment",
"sample_id": "sample_084_49ab8f06",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A bright, modern indoor walkway buzzes with pedestrian activity, framed by sterile architecture and neutral lighting that highlights the everyday rhythm of urban life."
},
{
"task_id": "task_0035",
"subset": "environment",
"sample_id": "sample_049_dccd53de",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet mountain village lies under a blue sky, its snow-draped houses and trees creating a serene, wintry atmosphere along a winding road."
},
{
"task_id": "task_0036",
"subset": "environment",
"sample_id": "sample_049_dccd53de",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet mountain village lies under a blue sky, its snow-draped houses and trees creating a serene, wintry atmosphere along a winding road."
},
{
"task_id": "task_0037",
"subset": "environment",
"sample_id": "sample_049_dccd53de",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet mountain village lies under a blue sky, its snow-draped houses and trees creating a serene, wintry atmosphere along a winding road."
},
{
"task_id": "task_0038",
"subset": "environment",
"sample_id": "sample_049_dccd53de",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet mountain village lies under a blue sky, its snow-draped houses and trees creating a serene, wintry atmosphere along a winding road."
},
{
"task_id": "task_0039",
"subset": "environment",
"sample_id": "sample_049_dccd53de",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet mountain village lies under a blue sky, its snow-draped houses and trees creating a serene, wintry atmosphere along a winding road."
},
{
"task_id": "task_0040",
"subset": "environment",
"sample_id": "sample_283_d7ff95ed",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A clean, neutral basement features brown wood-look flooring, light gray walls, and minimal furnishings, evoking a functional yet inviting atmosphere."
},
{
"task_id": "task_0041",
"subset": "environment",
"sample_id": "sample_283_d7ff95ed",
"camera_motion": "right_then_left",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A clean, neutral basement features brown wood-look flooring, light gray walls, and minimal furnishings, evoking a functional yet inviting atmosphere."
},
{
"task_id": "task_0042",
"subset": "environment",
"sample_id": "sample_283_d7ff95ed",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A clean, neutral basement features brown wood-look flooring, light gray walls, and minimal furnishings, evoking a functional yet inviting atmosphere."
},
{
"task_id": "task_0043",
"subset": "environment",
"sample_id": "sample_283_d7ff95ed",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A clean, neutral basement features brown wood-look flooring, light gray walls, and minimal furnishings, evoking a functional yet inviting atmosphere."
},
{
"task_id": "task_0044",
"subset": "environment",
"sample_id": "sample_283_d7ff95ed",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A clean, neutral basement features brown wood-look flooring, light gray walls, and minimal furnishings, evoking a functional yet inviting atmosphere."
},
{
"task_id": "task_0045",
"subset": "environment",
"sample_id": "sample_038_bfc4e110",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene, tiled walkway flanked by ornate columns leads through a grand, historical structure, bathed in soft daylight and evoking quiet elegance and contemplation."
},
{
"task_id": "task_0046",
"subset": "environment",
"sample_id": "sample_038_bfc4e110",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene, tiled walkway flanked by ornate columns leads through a grand, historical structure, bathed in soft daylight and evoking quiet elegance and contemplation."
},
{
"task_id": "task_0047",
"subset": "environment",
"sample_id": "sample_038_bfc4e110",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene, tiled walkway flanked by ornate columns leads through a grand, historical structure, bathed in soft daylight and evoking quiet elegance and contemplation."
},
{
"task_id": "task_0048",
"subset": "environment",
"sample_id": "sample_038_bfc4e110",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene, tiled walkway flanked by ornate columns leads through a grand, historical structure, bathed in soft daylight and evoking quiet elegance and contemplation."
},
{
"task_id": "task_0049",
"subset": "environment",
"sample_id": "sample_038_bfc4e110",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene, tiled walkway flanked by ornate columns leads through a grand, historical structure, bathed in soft daylight and evoking quiet elegance and contemplation."
},
{
"task_id": "task_0050",
"subset": "environment",
"sample_id": "sample_262_10a2b882",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene winter landscape features a snow-covered road flanked by towering trees under an overcast sky, evoking quiet isolation and natural beauty."
},
{
"task_id": "task_0051",
"subset": "environment",
"sample_id": "sample_262_10a2b882",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene winter landscape features a snow-covered road flanked by towering trees under an overcast sky, evoking quiet isolation and natural beauty."
},
{
"task_id": "task_0052",
"subset": "environment",
"sample_id": "sample_262_10a2b882",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene winter landscape features a snow-covered road flanked by towering trees under an overcast sky, evoking quiet isolation and natural beauty."
},
{
"task_id": "task_0053",
"subset": "environment",
"sample_id": "sample_262_10a2b882",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene winter landscape features a snow-covered road flanked by towering trees under an overcast sky, evoking quiet isolation and natural beauty."
},
{
"task_id": "task_0054",
"subset": "environment",
"sample_id": "sample_262_10a2b882",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene winter landscape features a snow-covered road flanked by towering trees under an overcast sky, evoking quiet isolation and natural beauty."
},
{
"task_id": "task_0055",
"subset": "environment",
"sample_id": "sample_212_7f0b6dfe",
"camera_motion": "right_then_left",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene lakeside scene features a wooden pier with tarped boats, traditional houses across the water, and forested hills under overcast skies, evoking calm and stillness."
},
{
"task_id": "task_0056",
"subset": "environment",
"sample_id": "sample_212_7f0b6dfe",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene lakeside scene features a wooden pier with tarped boats, traditional houses across the water, and forested hills under overcast skies, evoking calm and stillness."
},
{
"task_id": "task_0057",
"subset": "environment",
"sample_id": "sample_212_7f0b6dfe",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene lakeside scene features a wooden pier with tarped boats, traditional houses across the water, and forested hills under overcast skies, evoking calm and stillness."
},
{
"task_id": "task_0058",
"subset": "environment",
"sample_id": "sample_212_7f0b6dfe",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene lakeside scene features a wooden pier with tarped boats, traditional houses across the water, and forested hills under overcast skies, evoking calm and stillness."
},
{
"task_id": "task_0059",
"subset": "environment",
"sample_id": "sample_212_7f0b6dfe",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene lakeside scene features a wooden pier with tarped boats, traditional houses across the water, and forested hills under overcast skies, evoking calm and stillness."
},
{
"task_id": "task_0060",
"subset": "environment",
"sample_id": "sample_033_325d87a2",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene mountain village street under a blue sky, lined with houses, gardens, and towering peaks, evoking calm and rural charm with a clear, peaceful atmosphere."
},
{
"task_id": "task_0061",
"subset": "environment",
"sample_id": "sample_033_325d87a2",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene mountain village street under a blue sky, lined with houses, gardens, and towering peaks, evoking calm and rural charm with a clear, peaceful atmosphere."
},
{
"task_id": "task_0062",
"subset": "environment",
"sample_id": "sample_033_325d87a2",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene mountain village street under a blue sky, lined with houses, gardens, and towering peaks, evoking calm and rural charm with a clear, peaceful atmosphere."
},
{
"task_id": "task_0063",
"subset": "environment",
"sample_id": "sample_033_325d87a2",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene mountain village street under a blue sky, lined with houses, gardens, and towering peaks, evoking calm and rural charm with a clear, peaceful atmosphere."
},
{
"task_id": "task_0064",
"subset": "environment",
"sample_id": "sample_033_325d87a2",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene mountain village street under a blue sky, lined with houses, gardens, and towering peaks, evoking calm and rural charm with a clear, peaceful atmosphere."
},
{
"task_id": "task_0065",
"subset": "environment",
"sample_id": "sample_230_f55277fe",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A lively, illuminated courtyard filled with holiday decorations, surrounded by elegant architecture, where people gather under twinkling lights in a warm, celebratory atmosphere."
},
{
"task_id": "task_0066",
"subset": "environment",
"sample_id": "sample_230_f55277fe",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A lively, illuminated courtyard filled with holiday decorations, surrounded by elegant architecture, where people gather under twinkling lights in a warm, celebratory atmosphere."
},
{
"task_id": "task_0067",
"subset": "environment",
"sample_id": "sample_230_f55277fe",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A lively, illuminated courtyard filled with holiday decorations, surrounded by elegant architecture, where people gather under twinkling lights in a warm, celebratory atmosphere."
},
{
"task_id": "task_0068",
"subset": "environment",
"sample_id": "sample_230_f55277fe",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A lively, illuminated courtyard filled with holiday decorations, surrounded by elegant architecture, where people gather under twinkling lights in a warm, celebratory atmosphere."
},
{
"task_id": "task_0069",
"subset": "environment",
"sample_id": "sample_230_f55277fe",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A lively, illuminated courtyard filled with holiday decorations, surrounded by elegant architecture, where people gather under twinkling lights in a warm, celebratory atmosphere."
},
{
"task_id": "task_0070",
"subset": "environment",
"sample_id": "sample_169_6c0877dd",
"camera_motion": "right_then_left",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A lively riverside promenade in a historic European city, framed by elegant architecture and soft golden-hour light, exudes a tranquil yet vibrant atmosphere."
},
{
"task_id": "task_0071",
"subset": "environment",
"sample_id": "sample_169_6c0877dd",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A lively riverside promenade in a historic European city, framed by elegant architecture and soft golden-hour light, exudes a tranquil yet vibrant atmosphere."
},
{
"task_id": "task_0072",
"subset": "environment",
"sample_id": "sample_169_6c0877dd",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A lively riverside promenade in a historic European city, framed by elegant architecture and soft golden-hour light, exudes a tranquil yet vibrant atmosphere."
},
{
"task_id": "task_0073",
"subset": "environment",
"sample_id": "sample_169_6c0877dd",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A lively riverside promenade in a historic European city, framed by elegant architecture and soft golden-hour light, exudes a tranquil yet vibrant atmosphere."
},
{
"task_id": "task_0074",
"subset": "environment",
"sample_id": "sample_169_6c0877dd",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A lively riverside promenade in a historic European city, framed by elegant architecture and soft golden-hour light, exudes a tranquil yet vibrant atmosphere."
},
{
"task_id": "task_0075",
"subset": "environment",
"sample_id": "sample_015_93988d73",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet, winding asphalt road cuts through lush green hills under a clear blue sky, framed by power lines and a serene, remote atmosphere."
},
{
"task_id": "task_0076",
"subset": "environment",
"sample_id": "sample_015_93988d73",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet, winding asphalt road cuts through lush green hills under a clear blue sky, framed by power lines and a serene, remote atmosphere."
},
{
"task_id": "task_0077",
"subset": "environment",
"sample_id": "sample_015_93988d73",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet, winding asphalt road cuts through lush green hills under a clear blue sky, framed by power lines and a serene, remote atmosphere."
},
{
"task_id": "task_0078",
"subset": "environment",
"sample_id": "sample_015_93988d73",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet, winding asphalt road cuts through lush green hills under a clear blue sky, framed by power lines and a serene, remote atmosphere."
},
{
"task_id": "task_0079",
"subset": "environment",
"sample_id": "sample_015_93988d73",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet, winding asphalt road cuts through lush green hills under a clear blue sky, framed by power lines and a serene, remote atmosphere."
},
{
"task_id": "task_0080",
"subset": "environment",
"sample_id": "sample_014_6c4624bc",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A lively urban waterfront features a wooden pier, a red tent, sailboats, and a bridge under partly cloudy skies, exuding a relaxed, bustling atmosphere."
},
{
"task_id": "task_0081",
"subset": "environment",
"sample_id": "sample_014_6c4624bc",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A lively urban waterfront features a wooden pier, a red tent, sailboats, and a bridge under partly cloudy skies, exuding a relaxed, bustling atmosphere."
},
{
"task_id": "task_0082",
"subset": "environment",
"sample_id": "sample_014_6c4624bc",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A lively urban waterfront features a wooden pier, a red tent, sailboats, and a bridge under partly cloudy skies, exuding a relaxed, bustling atmosphere."
},
{
"task_id": "task_0083",
"subset": "environment",
"sample_id": "sample_014_6c4624bc",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A lively urban waterfront features a wooden pier, a red tent, sailboats, and a bridge under partly cloudy skies, exuding a relaxed, bustling atmosphere."
},
{
"task_id": "task_0084",
"subset": "environment",
"sample_id": "sample_014_6c4624bc",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A lively urban waterfront features a wooden pier, a red tent, sailboats, and a bridge under partly cloudy skies, exuding a relaxed, bustling atmosphere."
},
{
"task_id": "task_0085",
"subset": "environment",
"sample_id": "sample_034_221e4058",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A tranquil, well-lit bedroom features large windows, a gray headboard, white linens, and a dark blue bench, with an abstract painting above the bed and a hallway leading to other rooms."
},
{
"task_id": "task_0086",
"subset": "environment",
"sample_id": "sample_034_221e4058",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A tranquil, well-lit bedroom features large windows, a gray headboard, white linens, and a dark blue bench, with an abstract painting above the bed and a hallway leading to other rooms."
},
{
"task_id": "task_0087",
"subset": "environment",
"sample_id": "sample_034_221e4058",
"camera_motion": "left_then_right",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A tranquil, well-lit bedroom features large windows, a gray headboard, white linens, and a dark blue bench, with an abstract painting above the bed and a hallway leading to other rooms."
},
{
"task_id": "task_0088",
"subset": "environment",
"sample_id": "sample_034_221e4058",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A tranquil, well-lit bedroom features large windows, a gray headboard, white linens, and a dark blue bench, with an abstract painting above the bed and a hallway leading to other rooms."
},
{
"task_id": "task_0089",
"subset": "environment",
"sample_id": "sample_034_221e4058",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A tranquil, well-lit bedroom features large windows, a gray headboard, white linens, and a dark blue bench, with an abstract painting above the bed and a hallway leading to other rooms."
},
{
"task_id": "task_0090",
"subset": "environment",
"sample_id": "sample_082_1bbdf598",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sleek, white building with a distinctive window pattern stands on a city street, framed by construction barriers beneath a clear blue sky, creating a bright urban atmosphere."
},
{
"task_id": "task_0091",
"subset": "environment",
"sample_id": "sample_082_1bbdf598",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sleek, white building with a distinctive window pattern stands on a city street, framed by construction barriers beneath a clear blue sky, creating a bright urban atmosphere."
},
{
"task_id": "task_0092",
"subset": "environment",
"sample_id": "sample_082_1bbdf598",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sleek, white building with a distinctive window pattern stands on a city street, framed by construction barriers beneath a clear blue sky, creating a bright urban atmosphere."
},
{
"task_id": "task_0093",
"subset": "environment",
"sample_id": "sample_082_1bbdf598",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sleek, white building with a distinctive window pattern stands on a city street, framed by construction barriers beneath a clear blue sky, creating a bright urban atmosphere."
},
{
"task_id": "task_0094",
"subset": "environment",
"sample_id": "sample_082_1bbdf598",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sleek, white building with a distinctive window pattern stands on a city street, framed by construction barriers beneath a clear blue sky, creating a bright urban atmosphere."
},
{
"task_id": "task_0095",
"subset": "environment",
"sample_id": "sample_087_93ceda13",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A calm, modern room bathed in bright light features a man engrossed in a book, seated in a maroon armchair beside a lamp and tripod."
},
{
"task_id": "task_0096",
"subset": "environment",
"sample_id": "sample_087_93ceda13",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A calm, modern room bathed in bright light features a man engrossed in a book, seated in a maroon armchair beside a lamp and tripod."
},
{
"task_id": "task_0097",
"subset": "environment",
"sample_id": "sample_087_93ceda13",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A calm, modern room bathed in bright light features a man engrossed in a book, seated in a maroon armchair beside a lamp and tripod."
},
{
"task_id": "task_0098",
"subset": "environment",
"sample_id": "sample_087_93ceda13",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A calm, modern room bathed in bright light features a man engrossed in a book, seated in a maroon armchair beside a lamp and tripod."
},
{
"task_id": "task_0099",
"subset": "environment",
"sample_id": "sample_087_93ceda13",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A calm, modern room bathed in bright light features a man engrossed in a book, seated in a maroon armchair beside a lamp and tripod."
},
{
"task_id": "task_0100",
"subset": "object",
"sample_id": "sample_182_74b804c6",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A surreal urban street features a towering tree, fallen branches, and a quiet, eerie atmosphere under an altered color palette, evoking a sense of stillness and otherworldly calm.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0101",
"subset": "object",
"sample_id": "sample_182_74b804c6",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A surreal urban street features a towering tree, fallen branches, and a quiet, eerie atmosphere under an altered color palette, evoking a sense of stillness and otherworldly calm.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0102",
"subset": "object",
"sample_id": "sample_182_74b804c6",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A surreal urban street features a towering tree, fallen branches, and a quiet, eerie atmosphere under an altered color palette, evoking a sense of stillness and otherworldly calm.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0103",
"subset": "object",
"sample_id": "sample_182_74b804c6",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A surreal urban street features a towering tree, fallen branches, and a quiet, eerie atmosphere under an altered color palette, evoking a sense of stillness and otherworldly calm.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0104",
"subset": "object",
"sample_id": "sample_229_05ed3fe6",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A contemporary kitchen features stainless steel appliances, white and gray cabinets, dark countertops, and a marble-patterned backsplash under bright, even lighting.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0105",
"subset": "object",
"sample_id": "sample_229_05ed3fe6",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A contemporary kitchen features stainless steel appliances, white and gray cabinets, dark countertops, and a marble-patterned backsplash under bright, even lighting.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0106",
"subset": "object",
"sample_id": "sample_229_05ed3fe6",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A contemporary kitchen features stainless steel appliances, white and gray cabinets, dark countertops, and a marble-patterned backsplash under bright, even lighting.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0107",
"subset": "object",
"sample_id": "sample_229_05ed3fe6",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A contemporary kitchen features stainless steel appliances, white and gray cabinets, dark countertops, and a marble-patterned backsplash under bright, even lighting.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0108",
"subset": "object",
"sample_id": "sample_008_93bb110c",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet European village square with timber-framed buildings, cobblestone streets, and pedestrians, bathed in soft overcast light, evoking a calm, nostalgic atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0109",
"subset": "object",
"sample_id": "sample_008_93bb110c",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet European village square with timber-framed buildings, cobblestone streets, and pedestrians, bathed in soft overcast light, evoking a calm, nostalgic atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0110",
"subset": "object",
"sample_id": "sample_008_93bb110c",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet European village square with timber-framed buildings, cobblestone streets, and pedestrians, bathed in soft overcast light, evoking a calm, nostalgic atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0111",
"subset": "object",
"sample_id": "sample_008_93bb110c",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet European village square with timber-framed buildings, cobblestone streets, and pedestrians, bathed in soft overcast light, evoking a calm, nostalgic atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0112",
"subset": "object",
"sample_id": "sample_211_665b36fb",
"camera_motion": "right_then_left",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A peaceful Swiss village street lined with wooden houses and flower-filled balconies stretches toward snow-capped mountains under a clear blue sky.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0113",
"subset": "object",
"sample_id": "sample_211_665b36fb",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A peaceful Swiss village street lined with wooden houses and flower-filled balconies stretches toward snow-capped mountains under a clear blue sky.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0114",
"subset": "object",
"sample_id": "sample_211_665b36fb",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A peaceful Swiss village street lined with wooden houses and flower-filled balconies stretches toward snow-capped mountains under a clear blue sky.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0115",
"subset": "object",
"sample_id": "sample_211_665b36fb",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A peaceful Swiss village street lined with wooden houses and flower-filled balconies stretches toward snow-capped mountains under a clear blue sky.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0116",
"subset": "object",
"sample_id": "sample_077_fffd4177",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A weathered stone wall borders a sunny coastline, where blue waves meet rocks and people enjoy the beach under bright daylight.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0117",
"subset": "object",
"sample_id": "sample_077_fffd4177",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A weathered stone wall borders a sunny coastline, where blue waves meet rocks and people enjoy the beach under bright daylight.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0118",
"subset": "object",
"sample_id": "sample_077_fffd4177",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A weathered stone wall borders a sunny coastline, where blue waves meet rocks and people enjoy the beach under bright daylight.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0119",
"subset": "object",
"sample_id": "sample_077_fffd4177",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A weathered stone wall borders a sunny coastline, where blue waves meet rocks and people enjoy the beach under bright daylight.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0120",
"subset": "object",
"sample_id": "sample_272_f1ea4671",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A stately, multi-story house with a light brick facade and large windows sits in a quiet, upscale neighborhood, framed by a bare tree and a muted winter palette.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0121",
"subset": "object",
"sample_id": "sample_272_f1ea4671",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A stately, multi-story house with a light brick facade and large windows sits in a quiet, upscale neighborhood, framed by a bare tree and a muted winter palette.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0122",
"subset": "object",
"sample_id": "sample_272_f1ea4671",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A stately, multi-story house with a light brick facade and large windows sits in a quiet, upscale neighborhood, framed by a bare tree and a muted winter palette.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0123",
"subset": "object",
"sample_id": "sample_272_f1ea4671",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A stately, multi-story house with a light brick facade and large windows sits in a quiet, upscale neighborhood, framed by a bare tree and a muted winter palette.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0124",
"subset": "object",
"sample_id": "sample_249_2620f1d7",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A vibrant canyon with green vegetation, a reflective body of water, and distant figures on a cliff, bathed in bright light, evoking a vast natural environment.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0125",
"subset": "object",
"sample_id": "sample_249_2620f1d7",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A vibrant canyon with green vegetation, a reflective body of water, and distant figures on a cliff, bathed in bright light, evoking a vast natural environment.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0126",
"subset": "object",
"sample_id": "sample_249_2620f1d7",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A vibrant canyon with green vegetation, a reflective body of water, and distant figures on a cliff, bathed in bright light, evoking a vast natural environment.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0127",
"subset": "object",
"sample_id": "sample_249_2620f1d7",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A vibrant canyon with green vegetation, a reflective body of water, and distant figures on a cliff, bathed in bright light, evoking a vast natural environment.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0128",
"subset": "object",
"sample_id": "sample_265_ab6d3959",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A bright, contemporary home features an open layout with a sleek kitchen, modern furniture, and large windows that flood the space with natural light.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0129",
"subset": "object",
"sample_id": "sample_265_ab6d3959",
"camera_motion": "right_then_left",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A bright, contemporary home features an open layout with a sleek kitchen, modern furniture, and large windows that flood the space with natural light.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0130",
"subset": "object",
"sample_id": "sample_265_ab6d3959",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A bright, contemporary home features an open layout with a sleek kitchen, modern furniture, and large windows that flood the space with natural light.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0131",
"subset": "object",
"sample_id": "sample_265_ab6d3959",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A bright, contemporary home features an open layout with a sleek kitchen, modern furniture, and large windows that flood the space with natural light.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0132",
"subset": "object",
"sample_id": "sample_209_b7a80278",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene alpine village nestled among towering peaks, featuring a weathered barn, a curved road, and a clear blue sky, evoking peaceful isolation and natural beauty.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0133",
"subset": "object",
"sample_id": "sample_209_b7a80278",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene alpine village nestled among towering peaks, featuring a weathered barn, a curved road, and a clear blue sky, evoking peaceful isolation and natural beauty.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0134",
"subset": "object",
"sample_id": "sample_209_b7a80278",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene alpine village nestled among towering peaks, featuring a weathered barn, a curved road, and a clear blue sky, evoking peaceful isolation and natural beauty.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0135",
"subset": "object",
"sample_id": "sample_209_b7a80278",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene alpine village nestled among towering peaks, featuring a weathered barn, a curved road, and a clear blue sky, evoking peaceful isolation and natural beauty.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0136",
"subset": "object",
"sample_id": "sample_154_11880be2",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet, overcast scene features a two-door Ford Bronco parked on a paved lot near a forested area, bathed in soft, muted light that enhances the serene, isolated atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0137",
"subset": "object",
"sample_id": "sample_154_11880be2",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet, overcast scene features a two-door Ford Bronco parked on a paved lot near a forested area, bathed in soft, muted light that enhances the serene, isolated atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0138",
"subset": "object",
"sample_id": "sample_154_11880be2",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet, overcast scene features a two-door Ford Bronco parked on a paved lot near a forested area, bathed in soft, muted light that enhances the serene, isolated atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0139",
"subset": "object",
"sample_id": "sample_154_11880be2",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet, overcast scene features a two-door Ford Bronco parked on a paved lot near a forested area, bathed in soft, muted light that enhances the serene, isolated atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0140",
"subset": "object",
"sample_id": "sample_089_08e2d55f",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A relaxed urban cafe scene under yellow umbrellas, surrounded by modern glass buildings and lined with trees, exudes a calm, leisurely atmosphere on a sunny day.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0141",
"subset": "object",
"sample_id": "sample_089_08e2d55f",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A relaxed urban cafe scene under yellow umbrellas, surrounded by modern glass buildings and lined with trees, exudes a calm, leisurely atmosphere on a sunny day.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0142",
"subset": "object",
"sample_id": "sample_089_08e2d55f",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A relaxed urban cafe scene under yellow umbrellas, surrounded by modern glass buildings and lined with trees, exudes a calm, leisurely atmosphere on a sunny day.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0143",
"subset": "object",
"sample_id": "sample_089_08e2d55f",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A relaxed urban cafe scene under yellow umbrellas, surrounded by modern glass buildings and lined with trees, exudes a calm, leisurely atmosphere on a sunny day.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0144",
"subset": "object",
"sample_id": "sample_162_0add46c6",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sleek, brown-hued kitchen inside a vehicle features bright lighting, textured countertops, and modern appliances, with a figure in a striped dress standing at its center.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0145",
"subset": "object",
"sample_id": "sample_162_0add46c6",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sleek, brown-hued kitchen inside a vehicle features bright lighting, textured countertops, and modern appliances, with a figure in a striped dress standing at its center.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0146",
"subset": "object",
"sample_id": "sample_162_0add46c6",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sleek, brown-hued kitchen inside a vehicle features bright lighting, textured countertops, and modern appliances, with a figure in a striped dress standing at its center.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0147",
"subset": "object",
"sample_id": "sample_162_0add46c6",
"camera_motion": "left_then_right",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A sleek, brown-hued kitchen inside a vehicle features bright lighting, textured countertops, and modern appliances, with a figure in a striped dress standing at its center.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0148",
"subset": "object",
"sample_id": "sample_225_d1bb1d52",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A refined dining room features a long wooden table, elegant tableware, and abstract art, all bathed in natural light and exuding sophistication.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0149",
"subset": "object",
"sample_id": "sample_225_d1bb1d52",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A refined dining room features a long wooden table, elegant tableware, and abstract art, all bathed in natural light and exuding sophistication.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0150",
"subset": "object",
"sample_id": "sample_225_d1bb1d52",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A refined dining room features a long wooden table, elegant tableware, and abstract art, all bathed in natural light and exuding sophistication.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0151",
"subset": "object",
"sample_id": "sample_225_d1bb1d52",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A refined dining room features a long wooden table, elegant tableware, and abstract art, all bathed in natural light and exuding sophistication.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0152",
"subset": "object",
"sample_id": "sample_103_74c54180",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene village square under a blue sky, framed by a stone archway, wooden buildings, and a distant church tower, evoking a timeless atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0153",
"subset": "object",
"sample_id": "sample_103_74c54180",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene village square under a blue sky, framed by a stone archway, wooden buildings, and a distant church tower, evoking a timeless atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0154",
"subset": "object",
"sample_id": "sample_103_74c54180",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene village square under a blue sky, framed by a stone archway, wooden buildings, and a distant church tower, evoking a timeless atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0155",
"subset": "object",
"sample_id": "sample_103_74c54180",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene village square under a blue sky, framed by a stone archway, wooden buildings, and a distant church tower, evoking a timeless atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0156",
"subset": "object",
"sample_id": "sample_003_a51f0378",
"camera_motion": "left_then_right",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A suspension bridge spans a calm body of water, its steel structure contrasting with the reflective surface below under soft daylight.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0157",
"subset": "object",
"sample_id": "sample_003_a51f0378",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A suspension bridge spans a calm body of water, its steel structure contrasting with the reflective surface below under soft daylight.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0158",
"subset": "object",
"sample_id": "sample_003_a51f0378",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A suspension bridge spans a calm body of water, its steel structure contrasting with the reflective surface below under soft daylight.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0159",
"subset": "object",
"sample_id": "sample_003_a51f0378",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A suspension bridge spans a calm body of water, its steel structure contrasting with the reflective surface below under soft daylight.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/"
},
{
"task_id": "task_0160",
"subset": "human",
"sample_id": "mem_openhumanvid_4e5e1c0db89294099c90db5e79182763b7289b0501c9f4b87ccec06187a22c53",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Inside a brightly lit indoor space, likely a school hallway, three individuals—a man and two children—stand near a doorway adorned with educational posters about respiratory health. The man, wearing glasses and a brown shirt, listens as the boy in a yellow polo shirt gestures emphatically toward the girl beside him, who wears a matching uniform and a blue headband. She responds by pointing off-screen, drawing attention to something beyond the frame. A red and black bag hangs on the wall, and green plants add a touch of nature to the scene."
},
{
"task_id": "task_0161",
"subset": "human",
"sample_id": "mem_openhumanvid_4e5e1c0db89294099c90db5e79182763b7289b0501c9f4b87ccec06187a22c53",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Inside a brightly lit indoor space, likely a school hallway, three individuals—a man and two children—stand near a doorway adorned with educational posters about respiratory health. The man, wearing glasses and a brown shirt, listens as the boy in a yellow polo shirt gestures emphatically toward the girl beside him, who wears a matching uniform and a blue headband. She responds by pointing off-screen, drawing attention to something beyond the frame. A red and black bag hangs on the wall, and green plants add a touch of nature to the scene."
},
{
"task_id": "task_0162",
"subset": "human",
"sample_id": "mem_openhumanvid_4e5e1c0db89294099c90db5e79182763b7289b0501c9f4b87ccec06187a22c53",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Inside a brightly lit indoor space, likely a school hallway, three individuals—a man and two children—stand near a doorway adorned with educational posters about respiratory health. The man, wearing glasses and a brown shirt, listens as the boy in a yellow polo shirt gestures emphatically toward the girl beside him, who wears a matching uniform and a blue headband. She responds by pointing off-screen, drawing attention to something beyond the frame. A red and black bag hangs on the wall, and green plants add a touch of nature to the scene."
},
{
"task_id": "task_0163",
"subset": "human",
"sample_id": "mem_openhumanvid_4e5e1c0db89294099c90db5e79182763b7289b0501c9f4b87ccec06187a22c53",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Inside a brightly lit indoor space, likely a school hallway, three individuals—a man and two children—stand near a doorway adorned with educational posters about respiratory health. The man, wearing glasses and a brown shirt, listens as the boy in a yellow polo shirt gestures emphatically toward the girl beside him, who wears a matching uniform and a blue headband. She responds by pointing off-screen, drawing attention to something beyond the frame. A red and black bag hangs on the wall, and green plants add a touch of nature to the scene."
},
{
"task_id": "task_0164",
"subset": "human",
"sample_id": "mem_openhumanvid_a4bb26585db8a889039fb506d2bb4951fb4804d609e03426372abe70276bc212",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a dimly lit, rustic room with cracked earth walls and a woven basket hanging in the background, three people are gathered around a bed. A man lies propped up, wearing a white shirt stained with blood, while a woman in a patterned gray qipao sits beside him, holding a red cloth. Another man in a green jacket carefully unfolds the cloth, revealing a black hammer and sickle symbol. The group appears tense and focused, exchanging glances as they examine the item together."
},
{
"task_id": "task_0165",
"subset": "human",
"sample_id": "mem_openhumanvid_a4bb26585db8a889039fb506d2bb4951fb4804d609e03426372abe70276bc212",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a dimly lit, rustic room with cracked earth walls and a woven basket hanging in the background, three people are gathered around a bed. A man lies propped up, wearing a white shirt stained with blood, while a woman in a patterned gray qipao sits beside him, holding a red cloth. Another man in a green jacket carefully unfolds the cloth, revealing a black hammer and sickle symbol. The group appears tense and focused, exchanging glances as they examine the item together."
},
{
"task_id": "task_0166",
"subset": "human",
"sample_id": "mem_openhumanvid_a4bb26585db8a889039fb506d2bb4951fb4804d609e03426372abe70276bc212",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a dimly lit, rustic room with cracked earth walls and a woven basket hanging in the background, three people are gathered around a bed. A man lies propped up, wearing a white shirt stained with blood, while a woman in a patterned gray qipao sits beside him, holding a red cloth. Another man in a green jacket carefully unfolds the cloth, revealing a black hammer and sickle symbol. The group appears tense and focused, exchanging glances as they examine the item together."
},
{
"task_id": "task_0167",
"subset": "human",
"sample_id": "mem_openhumanvid_a4bb26585db8a889039fb506d2bb4951fb4804d609e03426372abe70276bc212",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a dimly lit, rustic room with cracked earth walls and a woven basket hanging in the background, three people are gathered around a bed. A man lies propped up, wearing a white shirt stained with blood, while a woman in a patterned gray qipao sits beside him, holding a red cloth. Another man in a green jacket carefully unfolds the cloth, revealing a black hammer and sickle symbol. The group appears tense and focused, exchanging glances as they examine the item together."
},
{
"task_id": "task_0168",
"subset": "human",
"sample_id": "mem_openhumanvid_7b09258d7c70665277b392483364194c07b779417e8db58cdc07986f8784de06",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "Two astronauts in white space suits with American flags and mission patches float in the vacuum of space, working on a metallic spacecraft module against the backdrop of Earth's blue atmosphere and star-filled sky. They move carefully, tethered to the structure, using tools to repair or install equipment on the exterior of the craft, their helmets reflecting light as they collaborate closely."
},
{
"task_id": "task_0169",
"subset": "human",
"sample_id": "mem_openhumanvid_7b09258d7c70665277b392483364194c07b779417e8db58cdc07986f8784de06",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "Two astronauts in white space suits with American flags and mission patches float in the vacuum of space, working on a metallic spacecraft module against the backdrop of Earth's blue atmosphere and star-filled sky. They move carefully, tethered to the structure, using tools to repair or install equipment on the exterior of the craft, their helmets reflecting light as they collaborate closely."
},
{
"task_id": "task_0170",
"subset": "human",
"sample_id": "mem_openhumanvid_7b09258d7c70665277b392483364194c07b779417e8db58cdc07986f8784de06",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "Two astronauts in white space suits with American flags and mission patches float in the vacuum of space, working on a metallic spacecraft module against the backdrop of Earth's blue atmosphere and star-filled sky. They move carefully, tethered to the structure, using tools to repair or install equipment on the exterior of the craft, their helmets reflecting light as they collaborate closely."
},
{
"task_id": "task_0171",
"subset": "human",
"sample_id": "mem_openhumanvid_7b09258d7c70665277b392483364194c07b779417e8db58cdc07986f8784de06",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "Two astronauts in white space suits with American flags and mission patches float in the vacuum of space, working on a metallic spacecraft module against the backdrop of Earth's blue atmosphere and star-filled sky. They move carefully, tethered to the structure, using tools to repair or install equipment on the exterior of the craft, their helmets reflecting light as they collaborate closely."
},
{
"task_id": "task_0172",
"subset": "human",
"sample_id": "mem_openhumanvid_6b0096237a984d01469ed6f68759743e35779783fcec334e0a0793ed682ec9b2",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a bright, spacious corridor with white arched walls and large black vases, several people gather in a social setting. Two men greet each other warmly, one in a red patterned shirt and shorts, the other in a pink suit. A woman in an orange and black striped outfit lifts her leg playfully as she interacts with them. Others stand nearby, including a woman in white pants and a leopard-print top, and another in a long dark dress holding a clutch. The atmosphere is lively, with light streaming through windows, and people engaging in animated conversation."
},
{
"task_id": "task_0173",
"subset": "human",
"sample_id": "mem_openhumanvid_6b0096237a984d01469ed6f68759743e35779783fcec334e0a0793ed682ec9b2",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a bright, spacious corridor with white arched walls and large black vases, several people gather in a social setting. Two men greet each other warmly, one in a red patterned shirt and shorts, the other in a pink suit. A woman in an orange and black striped outfit lifts her leg playfully as she interacts with them. Others stand nearby, including a woman in white pants and a leopard-print top, and another in a long dark dress holding a clutch. The atmosphere is lively, with light streaming through windows, and people engaging in animated conversation."
},
{
"task_id": "task_0174",
"subset": "human",
"sample_id": "mem_openhumanvid_6b0096237a984d01469ed6f68759743e35779783fcec334e0a0793ed682ec9b2",
"camera_motion": "left_then_right",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a bright, spacious corridor with white arched walls and large black vases, several people gather in a social setting. Two men greet each other warmly, one in a red patterned shirt and shorts, the other in a pink suit. A woman in an orange and black striped outfit lifts her leg playfully as she interacts with them. Others stand nearby, including a woman in white pants and a leopard-print top, and another in a long dark dress holding a clutch. The atmosphere is lively, with light streaming through windows, and people engaging in animated conversation."
},
{
"task_id": "task_0175",
"subset": "human",
"sample_id": "mem_openhumanvid_6b0096237a984d01469ed6f68759743e35779783fcec334e0a0793ed682ec9b2",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a bright, spacious corridor with white arched walls and large black vases, several people gather in a social setting. Two men greet each other warmly, one in a red patterned shirt and shorts, the other in a pink suit. A woman in an orange and black striped outfit lifts her leg playfully as she interacts with them. Others stand nearby, including a woman in white pants and a leopard-print top, and another in a long dark dress holding a clutch. The atmosphere is lively, with light streaming through windows, and people engaging in animated conversation."
},
{
"task_id": "task_0176",
"subset": "human",
"sample_id": "mem_openhumanvid_482a8459664c56747f22f8458aa38f09a449a4d63ac60817cb176673cbc40f5c",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man with short dark hair stands outdoors in a sunlit area, surrounded by lush green foliage that suggests a park or garden. He wears a mustard-yellow shirt under a brown sleeveless vest and holds a white flip phone to his ear with his right hand. His left hand initially covers his forehead before lowering as he listens intently, his expression shifting from concern to focused attention. The natural daylight casts soft shadows, highlighting the details of his face and attire."
},
{
"task_id": "task_0177",
"subset": "human",
"sample_id": "mem_openhumanvid_482a8459664c56747f22f8458aa38f09a449a4d63ac60817cb176673cbc40f5c",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man with short dark hair stands outdoors in a sunlit area, surrounded by lush green foliage that suggests a park or garden. He wears a mustard-yellow shirt under a brown sleeveless vest and holds a white flip phone to his ear with his right hand. His left hand initially covers his forehead before lowering as he listens intently, his expression shifting from concern to focused attention. The natural daylight casts soft shadows, highlighting the details of his face and attire."
},
{
"task_id": "task_0178",
"subset": "human",
"sample_id": "mem_openhumanvid_482a8459664c56747f22f8458aa38f09a449a4d63ac60817cb176673cbc40f5c",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man with short dark hair stands outdoors in a sunlit area, surrounded by lush green foliage that suggests a park or garden. He wears a mustard-yellow shirt under a brown sleeveless vest and holds a white flip phone to his ear with his right hand. His left hand initially covers his forehead before lowering as he listens intently, his expression shifting from concern to focused attention. The natural daylight casts soft shadows, highlighting the details of his face and attire."
},
{
"task_id": "task_0179",
"subset": "human",
"sample_id": "mem_openhumanvid_482a8459664c56747f22f8458aa38f09a449a4d63ac60817cb176673cbc40f5c",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man with short dark hair stands outdoors in a sunlit area, surrounded by lush green foliage that suggests a park or garden. He wears a mustard-yellow shirt under a brown sleeveless vest and holds a white flip phone to his ear with his right hand. His left hand initially covers his forehead before lowering as he listens intently, his expression shifting from concern to focused attention. The natural daylight casts soft shadows, highlighting the details of his face and attire."
},
{
"task_id": "task_0180",
"subset": "human",
"sample_id": "mem_openhumanvid_57e0cb96d5bbab5d8e4241dedfd509901e1e49b4b16d071dd2c83d6a28821067",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man walks across a sunlit, sloped roof covered in wooden shingles, surrounded by lush greenery and distant palm trees under a hazy sky. He wears a yellow patterned shirt over a white \"Champion\" t-shirt, blue jeans, sunglasses, and a tool belt. As he approaches the camera, he removes his shirt, revealing a toned torso, and tosses it aside while adjusting his hair. A large antenna stands nearby on the roof."
},
{
"task_id": "task_0181",
"subset": "human",
"sample_id": "mem_openhumanvid_57e0cb96d5bbab5d8e4241dedfd509901e1e49b4b16d071dd2c83d6a28821067",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man walks across a sunlit, sloped roof covered in wooden shingles, surrounded by lush greenery and distant palm trees under a hazy sky. He wears a yellow patterned shirt over a white \"Champion\" t-shirt, blue jeans, sunglasses, and a tool belt. As he approaches the camera, he removes his shirt, revealing a toned torso, and tosses it aside while adjusting his hair. A large antenna stands nearby on the roof."
},
{
"task_id": "task_0182",
"subset": "human",
"sample_id": "mem_openhumanvid_57e0cb96d5bbab5d8e4241dedfd509901e1e49b4b16d071dd2c83d6a28821067",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man walks across a sunlit, sloped roof covered in wooden shingles, surrounded by lush greenery and distant palm trees under a hazy sky. He wears a yellow patterned shirt over a white \"Champion\" t-shirt, blue jeans, sunglasses, and a tool belt. As he approaches the camera, he removes his shirt, revealing a toned torso, and tosses it aside while adjusting his hair. A large antenna stands nearby on the roof."
},
{
"task_id": "task_0183",
"subset": "human",
"sample_id": "mem_openhumanvid_57e0cb96d5bbab5d8e4241dedfd509901e1e49b4b16d071dd2c83d6a28821067",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man walks across a sunlit, sloped roof covered in wooden shingles, surrounded by lush greenery and distant palm trees under a hazy sky. He wears a yellow patterned shirt over a white \"Champion\" t-shirt, blue jeans, sunglasses, and a tool belt. As he approaches the camera, he removes his shirt, revealing a toned torso, and tosses it aside while adjusting his hair. A large antenna stands nearby on the roof."
},
{
"task_id": "task_0184",
"subset": "human",
"sample_id": "mem_openhumanvid_eaa7f4eb002d54e263471f4da094e428caad90be2dbb734f85ee17a1bf63b168",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A group of young female students in matching navy blue and white school uniforms perform on stage at a brightly lit indoor concert hall. They play black clarinets with focused expressions, their hands moving precisely over the keys. Behind them, other band members play brass instruments, while a conductor leads from the front. The stage is decorated with colorful banners and a large screen displaying Japanese text and cheerful graphics, indicating a festive event. The audience sits in darkness, watching attentively as the performers deliver a synchronized musical performance."
},
{
"task_id": "task_0185",
"subset": "human",
"sample_id": "mem_openhumanvid_eaa7f4eb002d54e263471f4da094e428caad90be2dbb734f85ee17a1bf63b168",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A group of young female students in matching navy blue and white school uniforms perform on stage at a brightly lit indoor concert hall. They play black clarinets with focused expressions, their hands moving precisely over the keys. Behind them, other band members play brass instruments, while a conductor leads from the front. The stage is decorated with colorful banners and a large screen displaying Japanese text and cheerful graphics, indicating a festive event. The audience sits in darkness, watching attentively as the performers deliver a synchronized musical performance."
},
{
"task_id": "task_0186",
"subset": "human",
"sample_id": "mem_openhumanvid_eaa7f4eb002d54e263471f4da094e428caad90be2dbb734f85ee17a1bf63b168",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A group of young female students in matching navy blue and white school uniforms perform on stage at a brightly lit indoor concert hall. They play black clarinets with focused expressions, their hands moving precisely over the keys. Behind them, other band members play brass instruments, while a conductor leads from the front. The stage is decorated with colorful banners and a large screen displaying Japanese text and cheerful graphics, indicating a festive event. The audience sits in darkness, watching attentively as the performers deliver a synchronized musical performance."
},
{
"task_id": "task_0187",
"subset": "human",
"sample_id": "mem_openhumanvid_eaa7f4eb002d54e263471f4da094e428caad90be2dbb734f85ee17a1bf63b168",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A group of young female students in matching navy blue and white school uniforms perform on stage at a brightly lit indoor concert hall. They play black clarinets with focused expressions, their hands moving precisely over the keys. Behind them, other band members play brass instruments, while a conductor leads from the front. The stage is decorated with colorful banners and a large screen displaying Japanese text and cheerful graphics, indicating a festive event. The audience sits in darkness, watching attentively as the performers deliver a synchronized musical performance."
},
{
"task_id": "task_0188",
"subset": "human",
"sample_id": "mem_openhumanvid_3b1b26c1d1c5783d72f01570a0b042df272b9f9164f2994c4195efea46483da0",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Inside an opulent, grand staircase hall adorned with ornate white plasterwork and intricate black iron railings, two men in 18th-century attire descend the sweeping stairs. Dressed in dark coats, breeches, and white stockings, they carry documents and converse as they walk. A third man stands at the top landing, observing them. The space is softly lit, emphasizing the luxurious red-carpeted steps, gilded accents, and elegant furnishings, including a candelabra on a marble pedestal and patterned armchairs below."
},
{
"task_id": "task_0189",
"subset": "human",
"sample_id": "mem_openhumanvid_3b1b26c1d1c5783d72f01570a0b042df272b9f9164f2994c4195efea46483da0",
"camera_motion": "left_then_right",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Inside an opulent, grand staircase hall adorned with ornate white plasterwork and intricate black iron railings, two men in 18th-century attire descend the sweeping stairs. Dressed in dark coats, breeches, and white stockings, they carry documents and converse as they walk. A third man stands at the top landing, observing them. The space is softly lit, emphasizing the luxurious red-carpeted steps, gilded accents, and elegant furnishings, including a candelabra on a marble pedestal and patterned armchairs below."
},
{
"task_id": "task_0190",
"subset": "human",
"sample_id": "mem_openhumanvid_3b1b26c1d1c5783d72f01570a0b042df272b9f9164f2994c4195efea46483da0",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Inside an opulent, grand staircase hall adorned with ornate white plasterwork and intricate black iron railings, two men in 18th-century attire descend the sweeping stairs. Dressed in dark coats, breeches, and white stockings, they carry documents and converse as they walk. A third man stands at the top landing, observing them. The space is softly lit, emphasizing the luxurious red-carpeted steps, gilded accents, and elegant furnishings, including a candelabra on a marble pedestal and patterned armchairs below."
},
{
"task_id": "task_0191",
"subset": "human",
"sample_id": "mem_openhumanvid_3b1b26c1d1c5783d72f01570a0b042df272b9f9164f2994c4195efea46483da0",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Inside an opulent, grand staircase hall adorned with ornate white plasterwork and intricate black iron railings, two men in 18th-century attire descend the sweeping stairs. Dressed in dark coats, breeches, and white stockings, they carry documents and converse as they walk. A third man stands at the top landing, observing them. The space is softly lit, emphasizing the luxurious red-carpeted steps, gilded accents, and elegant furnishings, including a candelabra on a marble pedestal and patterned armchairs below."
},
{
"task_id": "task_0192",
"subset": "human",
"sample_id": "mem_openhumanvid_3aea5f921baa2797ef215af691b392f87fcf39680db2bd1c50b7838614dc54f1",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A young woman with long, dark brown hair tied back stands in a dimly lit indoor space, her face softly illuminated by natural light from the right side, likely a window. She wears a light-colored top and maintains a calm, reflective expression while speaking, her mouth moving gently as she looks slightly off-camera. The background is plain and neutral, emphasizing her subtle facial movements and serene demeanor."
},
{
"task_id": "task_0193",
"subset": "human",
"sample_id": "mem_openhumanvid_3aea5f921baa2797ef215af691b392f87fcf39680db2bd1c50b7838614dc54f1",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A young woman with long, dark brown hair tied back stands in a dimly lit indoor space, her face softly illuminated by natural light from the right side, likely a window. She wears a light-colored top and maintains a calm, reflective expression while speaking, her mouth moving gently as she looks slightly off-camera. The background is plain and neutral, emphasizing her subtle facial movements and serene demeanor."
},
{
"task_id": "task_0194",
"subset": "human",
"sample_id": "mem_openhumanvid_3aea5f921baa2797ef215af691b392f87fcf39680db2bd1c50b7838614dc54f1",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A young woman with long, dark brown hair tied back stands in a dimly lit indoor space, her face softly illuminated by natural light from the right side, likely a window. She wears a light-colored top and maintains a calm, reflective expression while speaking, her mouth moving gently as she looks slightly off-camera. The background is plain and neutral, emphasizing her subtle facial movements and serene demeanor."
},
{
"task_id": "task_0195",
"subset": "human",
"sample_id": "mem_openhumanvid_3aea5f921baa2797ef215af691b392f87fcf39680db2bd1c50b7838614dc54f1",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A young woman with long, dark brown hair tied back stands in a dimly lit indoor space, her face softly illuminated by natural light from the right side, likely a window. She wears a light-colored top and maintains a calm, reflective expression while speaking, her mouth moving gently as she looks slightly off-camera. The background is plain and neutral, emphasizing her subtle facial movements and serene demeanor."
},
{
"task_id": "task_0196",
"subset": "human",
"sample_id": "mem_openhumanvid_57a4cd658dd761e08cf3f8e32d2a6e3fa828be08abee3171a21043e741a8449e",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man and a woman walk through an elegantly decorated interior space with soft, warm lighting. The room features ornate white wrought-iron railings in the foreground, delicate floral wall patterns, and a wooden door with glass panels. The man, wearing a dark jacket over a gray shirt, walks beside the woman in a light pink sweater; both appear engaged in conversation as they move forward, occasionally turning to face each other. Their expressions suggest a calm, thoughtful exchange as they navigate the refined, spacious environment."
},
{
"task_id": "task_0197",
"subset": "human",
"sample_id": "mem_openhumanvid_57a4cd658dd761e08cf3f8e32d2a6e3fa828be08abee3171a21043e741a8449e",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man and a woman walk through an elegantly decorated interior space with soft, warm lighting. The room features ornate white wrought-iron railings in the foreground, delicate floral wall patterns, and a wooden door with glass panels. The man, wearing a dark jacket over a gray shirt, walks beside the woman in a light pink sweater; both appear engaged in conversation as they move forward, occasionally turning to face each other. Their expressions suggest a calm, thoughtful exchange as they navigate the refined, spacious environment."
},
{
"task_id": "task_0198",
"subset": "human",
"sample_id": "mem_openhumanvid_57a4cd658dd761e08cf3f8e32d2a6e3fa828be08abee3171a21043e741a8449e",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man and a woman walk through an elegantly decorated interior space with soft, warm lighting. The room features ornate white wrought-iron railings in the foreground, delicate floral wall patterns, and a wooden door with glass panels. The man, wearing a dark jacket over a gray shirt, walks beside the woman in a light pink sweater; both appear engaged in conversation as they move forward, occasionally turning to face each other. Their expressions suggest a calm, thoughtful exchange as they navigate the refined, spacious environment."
},
{
"task_id": "task_0199",
"subset": "human",
"sample_id": "mem_openhumanvid_57a4cd658dd761e08cf3f8e32d2a6e3fa828be08abee3171a21043e741a8449e",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A man and a woman walk through an elegantly decorated interior space with soft, warm lighting. The room features ornate white wrought-iron railings in the foreground, delicate floral wall patterns, and a wooden door with glass panels. The man, wearing a dark jacket over a gray shirt, walks beside the woman in a light pink sweater; both appear engaged in conversation as they move forward, occasionally turning to face each other. Their expressions suggest a calm, thoughtful exchange as they navigate the refined, spacious environment."
},
{
"task_id": "task_0200",
"subset": "human",
"sample_id": "mem_openhumanvid_1f35e1c32209f2acdcec26a4e560e1cc7e1fdaad59f891bc03f2b157226c771d",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Two men walk side by side along a dimly lit alley at night, flanked by a bamboo fence and a building with glowing wall lamps. The man on the left wears a dark coat over a white shirt and vest, while the man on the right dons a black leather jacket. They converse as they move forward, occasionally glancing at each other; the man in the coat touches his nose briefly. Their expressions are serious, suggesting a tense or urgent discussion. The atmosphere is quiet and shadowy, with soft ambient lighting casting subtle highlights on their faces and surroundings."
},
{
"task_id": "task_0201",
"subset": "human",
"sample_id": "mem_openhumanvid_1f35e1c32209f2acdcec26a4e560e1cc7e1fdaad59f891bc03f2b157226c771d",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Two men walk side by side along a dimly lit alley at night, flanked by a bamboo fence and a building with glowing wall lamps. The man on the left wears a dark coat over a white shirt and vest, while the man on the right dons a black leather jacket. They converse as they move forward, occasionally glancing at each other; the man in the coat touches his nose briefly. Their expressions are serious, suggesting a tense or urgent discussion. The atmosphere is quiet and shadowy, with soft ambient lighting casting subtle highlights on their faces and surroundings."
},
{
"task_id": "task_0202",
"subset": "human",
"sample_id": "mem_openhumanvid_1f35e1c32209f2acdcec26a4e560e1cc7e1fdaad59f891bc03f2b157226c771d",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Two men walk side by side along a dimly lit alley at night, flanked by a bamboo fence and a building with glowing wall lamps. The man on the left wears a dark coat over a white shirt and vest, while the man on the right dons a black leather jacket. They converse as they move forward, occasionally glancing at each other; the man in the coat touches his nose briefly. Their expressions are serious, suggesting a tense or urgent discussion. The atmosphere is quiet and shadowy, with soft ambient lighting casting subtle highlights on their faces and surroundings."
},
{
"task_id": "task_0203",
"subset": "human",
"sample_id": "mem_openhumanvid_1f35e1c32209f2acdcec26a4e560e1cc7e1fdaad59f891bc03f2b157226c771d",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Two men walk side by side along a dimly lit alley at night, flanked by a bamboo fence and a building with glowing wall lamps. The man on the left wears a dark coat over a white shirt and vest, while the man on the right dons a black leather jacket. They converse as they move forward, occasionally glancing at each other; the man in the coat touches his nose briefly. Their expressions are serious, suggesting a tense or urgent discussion. The atmosphere is quiet and shadowy, with soft ambient lighting casting subtle highlights on their faces and surroundings."
},
{
"task_id": "task_0204",
"subset": "human",
"sample_id": "mem_openhumanvid_56494681253156a68d48e6373aaf1f88e76740d395dde7f74b6643da8a46fb7d",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a dimly lit, opulent interior adorned with red walls and golden railings, a woman in a striking red dress descends a grand staircase, greeted by a group of formally dressed men and women. The setting, rich with Nazi-era symbolism including military uniforms and a framed portrait labeled \"Friedrich Zoller,\" suggests a high-society gathering. The woman interacts warmly with a man in a tuxedo, touching his lapel as they converse amidst others holding champagne glasses, while the atmosphere buzzes with social elegance and subtle tension."
},
{
"task_id": "task_0205",
"subset": "human",
"sample_id": "mem_openhumanvid_56494681253156a68d48e6373aaf1f88e76740d395dde7f74b6643da8a46fb7d",
"camera_motion": "left_then_right",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a dimly lit, opulent interior adorned with red walls and golden railings, a woman in a striking red dress descends a grand staircase, greeted by a group of formally dressed men and women. The setting, rich with Nazi-era symbolism including military uniforms and a framed portrait labeled \"Friedrich Zoller,\" suggests a high-society gathering. The woman interacts warmly with a man in a tuxedo, touching his lapel as they converse amidst others holding champagne glasses, while the atmosphere buzzes with social elegance and subtle tension."
},
{
"task_id": "task_0206",
"subset": "human",
"sample_id": "mem_openhumanvid_56494681253156a68d48e6373aaf1f88e76740d395dde7f74b6643da8a46fb7d",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a dimly lit, opulent interior adorned with red walls and golden railings, a woman in a striking red dress descends a grand staircase, greeted by a group of formally dressed men and women. The setting, rich with Nazi-era symbolism including military uniforms and a framed portrait labeled \"Friedrich Zoller,\" suggests a high-society gathering. The woman interacts warmly with a man in a tuxedo, touching his lapel as they converse amidst others holding champagne glasses, while the atmosphere buzzes with social elegance and subtle tension."
},
{
"task_id": "task_0207",
"subset": "human",
"sample_id": "mem_openhumanvid_56494681253156a68d48e6373aaf1f88e76740d395dde7f74b6643da8a46fb7d",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a dimly lit, opulent interior adorned with red walls and golden railings, a woman in a striking red dress descends a grand staircase, greeted by a group of formally dressed men and women. The setting, rich with Nazi-era symbolism including military uniforms and a framed portrait labeled \"Friedrich Zoller,\" suggests a high-society gathering. The woman interacts warmly with a man in a tuxedo, touching his lapel as they converse amidst others holding champagne glasses, while the atmosphere buzzes with social elegance and subtle tension."
},
{
"task_id": "task_0208",
"subset": "human",
"sample_id": "mem_openhumanvid_ede8b958a02c1c954adf48b32693b09368ea497223b4518453c7c800a2a4b9b5",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "On a busy urban street in Hong Kong, three people stand near a parked taxi under daylight. A man in a gray shirt and green undershirt opens the car door as a woman in black enters. Another woman in a yellow top and patterned jacket stands nearby, visibly emotional. She hugs the man tightly, her face buried in his shoulder, while he comforts her. The background features shops, including \"The Hong Kong & Kowloon\" sign, and a red circular structure."
},
{
"task_id": "task_0209",
"subset": "human",
"sample_id": "mem_openhumanvid_ede8b958a02c1c954adf48b32693b09368ea497223b4518453c7c800a2a4b9b5",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "On a busy urban street in Hong Kong, three people stand near a parked taxi under daylight. A man in a gray shirt and green undershirt opens the car door as a woman in black enters. Another woman in a yellow top and patterned jacket stands nearby, visibly emotional. She hugs the man tightly, her face buried in his shoulder, while he comforts her. The background features shops, including \"The Hong Kong & Kowloon\" sign, and a red circular structure."
},
{
"task_id": "task_0210",
"subset": "human",
"sample_id": "mem_openhumanvid_ede8b958a02c1c954adf48b32693b09368ea497223b4518453c7c800a2a4b9b5",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "On a busy urban street in Hong Kong, three people stand near a parked taxi under daylight. A man in a gray shirt and green undershirt opens the car door as a woman in black enters. Another woman in a yellow top and patterned jacket stands nearby, visibly emotional. She hugs the man tightly, her face buried in his shoulder, while he comforts her. The background features shops, including \"The Hong Kong & Kowloon\" sign, and a red circular structure."
},
{
"task_id": "task_0211",
"subset": "human",
"sample_id": "mem_openhumanvid_ede8b958a02c1c954adf48b32693b09368ea497223b4518453c7c800a2a4b9b5",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "On a busy urban street in Hong Kong, three people stand near a parked taxi under daylight. A man in a gray shirt and green undershirt opens the car door as a woman in black enters. Another woman in a yellow top and patterned jacket stands nearby, visibly emotional. She hugs the man tightly, her face buried in his shoulder, while he comforts her. The background features shops, including \"The Hong Kong & Kowloon\" sign, and a red circular structure."
},
{
"task_id": "task_0212",
"subset": "human",
"sample_id": "mem_openhumanvid_b23b88de349c55e07f0d678a79e0f5b777848da7a12184fbad56a6e3931bc960",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "Inside a modestly furnished room with pale walls and a red-trimmed shelf, an older woman with gray-streaked hair tied back in a bun stands facing right, wearing a dark blouse with ornate embroidery. Beside her, a younger girl in a light pink collared shirt looks on with a solemn expression. The older woman speaks, her mouth moving slightly, while the girl remains still, her gaze fixed forward. Natural light filters in from a window to the left, casting soft shadows."
},
{
"task_id": "task_0213",
"subset": "human",
"sample_id": "mem_openhumanvid_b23b88de349c55e07f0d678a79e0f5b777848da7a12184fbad56a6e3931bc960",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "Inside a modestly furnished room with pale walls and a red-trimmed shelf, an older woman with gray-streaked hair tied back in a bun stands facing right, wearing a dark blouse with ornate embroidery. Beside her, a younger girl in a light pink collared shirt looks on with a solemn expression. The older woman speaks, her mouth moving slightly, while the girl remains still, her gaze fixed forward. Natural light filters in from a window to the left, casting soft shadows."
},
{
"task_id": "task_0214",
"subset": "human",
"sample_id": "mem_openhumanvid_b23b88de349c55e07f0d678a79e0f5b777848da7a12184fbad56a6e3931bc960",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "Inside a modestly furnished room with pale walls and a red-trimmed shelf, an older woman with gray-streaked hair tied back in a bun stands facing right, wearing a dark blouse with ornate embroidery. Beside her, a younger girl in a light pink collared shirt looks on with a solemn expression. The older woman speaks, her mouth moving slightly, while the girl remains still, her gaze fixed forward. Natural light filters in from a window to the left, casting soft shadows."
},
{
"task_id": "task_0215",
"subset": "human",
"sample_id": "mem_openhumanvid_b23b88de349c55e07f0d678a79e0f5b777848da7a12184fbad56a6e3931bc960",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "Inside a modestly furnished room with pale walls and a red-trimmed shelf, an older woman with gray-streaked hair tied back in a bun stands facing right, wearing a dark blouse with ornate embroidery. Beside her, a younger girl in a light pink collared shirt looks on with a solemn expression. The older woman speaks, her mouth moving slightly, while the girl remains still, her gaze fixed forward. Natural light filters in from a window to the left, casting soft shadows."
},
{
"task_id": "task_0216",
"subset": "human",
"sample_id": "mem_openhumanvid_73ed464ba39c63e4ff1de18c3d429a351571ef0549c9012e3f4a6cfad0f0eef5",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a dimly lit, vintage-style office adorned with a large map on the wall and wooden furniture, two men engage in a tense interaction. One wears a dark Mao jacket, the other a brown suit with a vest and tie. The man in the Mao jacket places a folder on a green desk lamp before both turn and walk toward a window with lattice panes, their expressions serious. They pause near the window, exchanging glances, as daylight filters through, revealing a traditional courtyard outside."
},
{
"task_id": "task_0217",
"subset": "human",
"sample_id": "mem_openhumanvid_73ed464ba39c63e4ff1de18c3d429a351571ef0549c9012e3f4a6cfad0f0eef5",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a dimly lit, vintage-style office adorned with a large map on the wall and wooden furniture, two men engage in a tense interaction. One wears a dark Mao jacket, the other a brown suit with a vest and tie. The man in the Mao jacket places a folder on a green desk lamp before both turn and walk toward a window with lattice panes, their expressions serious. They pause near the window, exchanging glances, as daylight filters through, revealing a traditional courtyard outside."
},
{
"task_id": "task_0218",
"subset": "human",
"sample_id": "mem_openhumanvid_73ed464ba39c63e4ff1de18c3d429a351571ef0549c9012e3f4a6cfad0f0eef5",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a dimly lit, vintage-style office adorned with a large map on the wall and wooden furniture, two men engage in a tense interaction. One wears a dark Mao jacket, the other a brown suit with a vest and tie. The man in the Mao jacket places a folder on a green desk lamp before both turn and walk toward a window with lattice panes, their expressions serious. They pause near the window, exchanging glances, as daylight filters through, revealing a traditional courtyard outside."
},
{
"task_id": "task_0219",
"subset": "human",
"sample_id": "mem_openhumanvid_73ed464ba39c63e4ff1de18c3d429a351571ef0549c9012e3f4a6cfad0f0eef5",
"camera_motion": "left_then_right",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In a dimly lit, vintage-style office adorned with a large map on the wall and wooden furniture, two men engage in a tense interaction. One wears a dark Mao jacket, the other a brown suit with a vest and tie. The man in the Mao jacket places a folder on a green desk lamp before both turn and walk toward a window with lattice panes, their expressions serious. They pause near the window, exchanging glances, as daylight filters through, revealing a traditional courtyard outside."
},
{
"task_id": "task_0220",
"subset": "causal",
"sample_id": "a00325_00018",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A close-up view captures a viscous, amber-colored liquid being poured into a container, creating a smooth, continuous stream that merges with the existing fluid. The surface tension forms gentle ripples and small bubbles as the liquid spreads outward. The warm lighting highlights the fluid's glossy texture and subtle reflections, emphasizing its slow, steady flow and the dynamic interplay between gravity and viscosity."
},
{
"task_id": "task_0221",
"subset": "causal",
"sample_id": "a00325_00018",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A close-up view captures a viscous, amber-colored liquid being poured into a container, creating a smooth, continuous stream that merges with the existing fluid. The surface tension forms gentle ripples and small bubbles as the liquid spreads outward. The warm lighting highlights the fluid's glossy texture and subtle reflections, emphasizing its slow, steady flow and the dynamic interplay between gravity and viscosity."
},
{
"task_id": "task_0222",
"subset": "causal",
"sample_id": "a00325_00018",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A close-up view captures a viscous, amber-colored liquid being poured into a container, creating a smooth, continuous stream that merges with the existing fluid. The surface tension forms gentle ripples and small bubbles as the liquid spreads outward. The warm lighting highlights the fluid's glossy texture and subtle reflections, emphasizing its slow, steady flow and the dynamic interplay between gravity and viscosity."
},
{
"task_id": "task_0223",
"subset": "causal",
"sample_id": "a00325_00018",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A close-up view captures a viscous, amber-colored liquid being poured into a container, creating a smooth, continuous stream that merges with the existing fluid. The surface tension forms gentle ripples and small bubbles as the liquid spreads outward. The warm lighting highlights the fluid's glossy texture and subtle reflections, emphasizing its slow, steady flow and the dynamic interplay between gravity and viscosity."
},
{
"task_id": "task_0224",
"subset": "causal",
"sample_id": "a00060_01459",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In an outdoor setting with lush green foliage in the background, a blue metal shredder with black and yellow hazard stripes is positioned on a red tarp. Inside the shredder, a black AXE deodorant can is placed atop rotating metal blades. As the blades activate, the can is crushed and flattened, with fragments scattering and debris flying outward. A second identical can rests nearby, untouched. The scene is illuminated by natural daylight, highlighting the mechanical destruction of the object."
},
{
"task_id": "task_0225",
"subset": "causal",
"sample_id": "a00060_01459",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In an outdoor setting with lush green foliage in the background, a blue metal shredder with black and yellow hazard stripes is positioned on a red tarp. Inside the shredder, a black AXE deodorant can is placed atop rotating metal blades. As the blades activate, the can is crushed and flattened, with fragments scattering and debris flying outward. A second identical can rests nearby, untouched. The scene is illuminated by natural daylight, highlighting the mechanical destruction of the object."
},
{
"task_id": "task_0226",
"subset": "causal",
"sample_id": "a00060_01459",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In an outdoor setting with lush green foliage in the background, a blue metal shredder with black and yellow hazard stripes is positioned on a red tarp. Inside the shredder, a black AXE deodorant can is placed atop rotating metal blades. As the blades activate, the can is crushed and flattened, with fragments scattering and debris flying outward. A second identical can rests nearby, untouched. The scene is illuminated by natural daylight, highlighting the mechanical destruction of the object."
},
{
"task_id": "task_0227",
"subset": "causal",
"sample_id": "a00060_01459",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "In an outdoor setting with lush green foliage in the background, a blue metal shredder with black and yellow hazard stripes is positioned on a red tarp. Inside the shredder, a black AXE deodorant can is placed atop rotating metal blades. As the blades activate, the can is crushed and flattened, with fragments scattering and debris flying outward. A second identical can rests nearby, untouched. The scene is illuminated by natural daylight, highlighting the mechanical destruction of the object."
},
{
"task_id": "task_0228",
"subset": "causal",
"sample_id": "a00863_00430",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A wooden curved ramp constructed from light-colored wood guides translucent yellow marbles, each marked with red stars, into a confined area bounded by wooden blocks. The marbles roll down the ramp, gaining momentum before colliding with others already gathered below, causing dynamic shifts in their positions. The smooth surface of the ramp and the reflective quality of the marbles highlight motion and interaction under bright, even lighting, emphasizing rolling, bouncing, and gravitational effects within the enclosed space."
},
{
"task_id": "task_0229",
"subset": "causal",
"sample_id": "a00863_00430",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A wooden curved ramp constructed from light-colored wood guides translucent yellow marbles, each marked with red stars, into a confined area bounded by wooden blocks. The marbles roll down the ramp, gaining momentum before colliding with others already gathered below, causing dynamic shifts in their positions. The smooth surface of the ramp and the reflective quality of the marbles highlight motion and interaction under bright, even lighting, emphasizing rolling, bouncing, and gravitational effects within the enclosed space."
},
{
"task_id": "task_0230",
"subset": "causal",
"sample_id": "a00863_00430",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A wooden curved ramp constructed from light-colored wood guides translucent yellow marbles, each marked with red stars, into a confined area bounded by wooden blocks. The marbles roll down the ramp, gaining momentum before colliding with others already gathered below, causing dynamic shifts in their positions. The smooth surface of the ramp and the reflective quality of the marbles highlight motion and interaction under bright, even lighting, emphasizing rolling, bouncing, and gravitational effects within the enclosed space."
},
{
"task_id": "task_0231",
"subset": "causal",
"sample_id": "a00863_00430",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A wooden curved ramp constructed from light-colored wood guides translucent yellow marbles, each marked with red stars, into a confined area bounded by wooden blocks. The marbles roll down the ramp, gaining momentum before colliding with others already gathered below, causing dynamic shifts in their positions. The smooth surface of the ramp and the reflective quality of the marbles highlight motion and interaction under bright, even lighting, emphasizing rolling, bouncing, and gravitational effects within the enclosed space."
},
{
"task_id": "task_0232",
"subset": "causal",
"sample_id": "a00606_01009",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A wooden marble run structure is set up on a white textured surface, supported by red Coca-Cola cans. The track consists of light-colored wooden rails with grooves, arranged in a zigzag pattern. Colored marbles—yellow, red, and blue—roll down the tracks, navigating curves and slopes. Gravity drives their motion, causing collisions and momentum transfer as they descend. The marbles follow the path smoothly, demonstrating rolling friction and inertial movement along the elevated wooden rails."
},
{
"task_id": "task_0233",
"subset": "causal",
"sample_id": "a00606_01009",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A wooden marble run structure is set up on a white textured surface, supported by red Coca-Cola cans. The track consists of light-colored wooden rails with grooves, arranged in a zigzag pattern. Colored marbles—yellow, red, and blue—roll down the tracks, navigating curves and slopes. Gravity drives their motion, causing collisions and momentum transfer as they descend. The marbles follow the path smoothly, demonstrating rolling friction and inertial movement along the elevated wooden rails."
},
{
"task_id": "task_0234",
"subset": "causal",
"sample_id": "a00606_01009",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A wooden marble run structure is set up on a white textured surface, supported by red Coca-Cola cans. The track consists of light-colored wooden rails with grooves, arranged in a zigzag pattern. Colored marbles—yellow, red, and blue—roll down the tracks, navigating curves and slopes. Gravity drives their motion, causing collisions and momentum transfer as they descend. The marbles follow the path smoothly, demonstrating rolling friction and inertial movement along the elevated wooden rails."
},
{
"task_id": "task_0235",
"subset": "causal",
"sample_id": "a00606_01009",
"camera_motion": "right_then_left",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A wooden marble run structure is set up on a white textured surface, supported by red Coca-Cola cans. The track consists of light-colored wooden rails with grooves, arranged in a zigzag pattern. Colored marbles—yellow, red, and blue—roll down the tracks, navigating curves and slopes. Gravity drives their motion, causing collisions and momentum transfer as they descend. The marbles follow the path smoothly, demonstrating rolling friction and inertial movement along the elevated wooden rails."
},
{
"task_id": "task_0236",
"subset": "causal",
"sample_id": "a00655_00572",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A massive, rusted mechanical structure descends vertically onto a barren, desert landscape, its sharp tip piercing the dry earth. The camera captures an aerial view as the structure impacts the ground, causing a small explosion of dust and debris. A humanoid figure in tattered clothing is flung upward from the base, tumbling through the air before being caught by a rotating platform attached to the descending mechanism. The scene is bathed in warm, golden light, emphasizing the arid terrain and the metallic sheen of the machine."
},
{
"task_id": "task_0237",
"subset": "causal",
"sample_id": "a00655_00572",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A massive, rusted mechanical structure descends vertically onto a barren, desert landscape, its sharp tip piercing the dry earth. The camera captures an aerial view as the structure impacts the ground, causing a small explosion of dust and debris. A humanoid figure in tattered clothing is flung upward from the base, tumbling through the air before being caught by a rotating platform attached to the descending mechanism. The scene is bathed in warm, golden light, emphasizing the arid terrain and the metallic sheen of the machine."
},
{
"task_id": "task_0238",
"subset": "causal",
"sample_id": "a00655_00572",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A massive, rusted mechanical structure descends vertically onto a barren, desert landscape, its sharp tip piercing the dry earth. The camera captures an aerial view as the structure impacts the ground, causing a small explosion of dust and debris. A humanoid figure in tattered clothing is flung upward from the base, tumbling through the air before being caught by a rotating platform attached to the descending mechanism. The scene is bathed in warm, golden light, emphasizing the arid terrain and the metallic sheen of the machine."
},
{
"task_id": "task_0239",
"subset": "causal",
"sample_id": "a00655_00572",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A massive, rusted mechanical structure descends vertically onto a barren, desert landscape, its sharp tip piercing the dry earth. The camera captures an aerial view as the structure impacts the ground, causing a small explosion of dust and debris. A humanoid figure in tattered clothing is flung upward from the base, tumbling through the air before being caught by a rotating platform attached to the descending mechanism. The scene is bathed in warm, golden light, emphasizing the arid terrain and the metallic sheen of the machine."
},
{
"task_id": "task_0240",
"subset": "causal",
"sample_id": "a00157_00451",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A dark, well-used wok sits on a gas stove burner in a dimly lit kitchen. A thin stream of oil pours into the center of the hot surface, spreading outward in concentric ripples as it coats the pan. The oil flows smoothly, forming a glossy, reflective layer that gradually covers the bottom. The wok's seasoned, slightly uneven interior reflects light subtly, while the surrounding stove and countertop remain out of focus, emphasizing the fluid dynamics of the oil spreading under gravity and surface tension."
},
{
"task_id": "task_0241",
"subset": "causal",
"sample_id": "a00157_00451",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A dark, well-used wok sits on a gas stove burner in a dimly lit kitchen. A thin stream of oil pours into the center of the hot surface, spreading outward in concentric ripples as it coats the pan. The oil flows smoothly, forming a glossy, reflective layer that gradually covers the bottom. The wok's seasoned, slightly uneven interior reflects light subtly, while the surrounding stove and countertop remain out of focus, emphasizing the fluid dynamics of the oil spreading under gravity and surface tension."
},
{
"task_id": "task_0242",
"subset": "causal",
"sample_id": "a00157_00451",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A dark, well-used wok sits on a gas stove burner in a dimly lit kitchen. A thin stream of oil pours into the center of the hot surface, spreading outward in concentric ripples as it coats the pan. The oil flows smoothly, forming a glossy, reflective layer that gradually covers the bottom. The wok's seasoned, slightly uneven interior reflects light subtly, while the surrounding stove and countertop remain out of focus, emphasizing the fluid dynamics of the oil spreading under gravity and surface tension."
},
{
"task_id": "task_0243",
"subset": "causal",
"sample_id": "a00157_00451",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A dark, well-used wok sits on a gas stove burner in a dimly lit kitchen. A thin stream of oil pours into the center of the hot surface, spreading outward in concentric ripples as it coats the pan. The oil flows smoothly, forming a glossy, reflective layer that gradually covers the bottom. The wok's seasoned, slightly uneven interior reflects light subtly, while the surrounding stove and countertop remain out of focus, emphasizing the fluid dynamics of the oil spreading under gravity and surface tension."
},
{
"task_id": "task_0244",
"subset": "causal",
"sample_id": "a00528_02524",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A child kneels on a light wooden floor, interacting with a yellow, circular toy that spins rapidly. The toy, made of plastic with a red central knob and translucent colored dots, wobbles and tilts as it rotates, demonstrating gyroscopic motion and angular momentum. The child’s hands guide the toy into motion, then release it, allowing it to spin independently while gradually losing balance due to friction and gravity. The scene is dimly lit, emphasizing the toy’s movement against the static background."
},
{
"task_id": "task_0245",
"subset": "causal",
"sample_id": "a00528_02524",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A child kneels on a light wooden floor, interacting with a yellow, circular toy that spins rapidly. The toy, made of plastic with a red central knob and translucent colored dots, wobbles and tilts as it rotates, demonstrating gyroscopic motion and angular momentum. The child’s hands guide the toy into motion, then release it, allowing it to spin independently while gradually losing balance due to friction and gravity. The scene is dimly lit, emphasizing the toy’s movement against the static background."
},
{
"task_id": "task_0246",
"subset": "causal",
"sample_id": "a00528_02524",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A child kneels on a light wooden floor, interacting with a yellow, circular toy that spins rapidly. The toy, made of plastic with a red central knob and translucent colored dots, wobbles and tilts as it rotates, demonstrating gyroscopic motion and angular momentum. The child’s hands guide the toy into motion, then release it, allowing it to spin independently while gradually losing balance due to friction and gravity. The scene is dimly lit, emphasizing the toy’s movement against the static background."
},
{
"task_id": "task_0247",
"subset": "causal",
"sample_id": "a00528_02524",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A child kneels on a light wooden floor, interacting with a yellow, circular toy that spins rapidly. The toy, made of plastic with a red central knob and translucent colored dots, wobbles and tilts as it rotates, demonstrating gyroscopic motion and angular momentum. The child’s hands guide the toy into motion, then release it, allowing it to spin independently while gradually losing balance due to friction and gravity. The scene is dimly lit, emphasizing the toy’s movement against the static background."
},
{
"task_id": "task_0248",
"subset": "causal",
"sample_id": "a00110_01560",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A wooden block rests on a smooth, bright blue surface, supporting a cylindrical spool wrapped in white string. Atop the spool sits a black cap with a central metal bolt. A person's hands position a metallic wrench onto the bolt, threading a thin white string through its loop. As the wrench is pulled upward, the string unwinds rapidly, causing the wrench to spin and rise vertically, demonstrating rotational motion and tension dynamics."
},
{
"task_id": "task_0249",
"subset": "causal",
"sample_id": "a00110_01560",
"camera_motion": "left_then_right",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A wooden block rests on a smooth, bright blue surface, supporting a cylindrical spool wrapped in white string. Atop the spool sits a black cap with a central metal bolt. A person's hands position a metallic wrench onto the bolt, threading a thin white string through its loop. As the wrench is pulled upward, the string unwinds rapidly, causing the wrench to spin and rise vertically, demonstrating rotational motion and tension dynamics."
},
{
"task_id": "task_0250",
"subset": "causal",
"sample_id": "a00110_01560",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A wooden block rests on a smooth, bright blue surface, supporting a cylindrical spool wrapped in white string. Atop the spool sits a black cap with a central metal bolt. A person's hands position a metallic wrench onto the bolt, threading a thin white string through its loop. As the wrench is pulled upward, the string unwinds rapidly, causing the wrench to spin and rise vertically, demonstrating rotational motion and tension dynamics."
},
{
"task_id": "task_0251",
"subset": "causal",
"sample_id": "a00110_01560",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A wooden block rests on a smooth, bright blue surface, supporting a cylindrical spool wrapped in white string. Atop the spool sits a black cap with a central metal bolt. A person's hands position a metallic wrench onto the bolt, threading a thin white string through its loop. As the wrench is pulled upward, the string unwinds rapidly, causing the wrench to spin and rise vertically, demonstrating rotational motion and tension dynamics."
},
{
"task_id": "task_0252",
"subset": "causal",
"sample_id": "a00670_01404",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a sunlit grassy clearing surrounded by dense green trees, a man in a gray shirt and dark pants swings a long blade at a purple foam target shaped like a humanoid figure. The target, marked with a white \"UP\" sign, splits apart upon impact, its limbs detaching and tumbling across the grass. Fragments scatter as the man follows through, lifting one severed piece before tossing it aside. The scene captures the dynamic collision, deformation, and dispersal of soft foam under force, with gravity pulling the pieces downward."
},
{
"task_id": "task_0253",
"subset": "causal",
"sample_id": "a00670_01404",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a sunlit grassy clearing surrounded by dense green trees, a man in a gray shirt and dark pants swings a long blade at a purple foam target shaped like a humanoid figure. The target, marked with a white \"UP\" sign, splits apart upon impact, its limbs detaching and tumbling across the grass. Fragments scatter as the man follows through, lifting one severed piece before tossing it aside. The scene captures the dynamic collision, deformation, and dispersal of soft foam under force, with gravity pulling the pieces downward."
},
{
"task_id": "task_0254",
"subset": "causal",
"sample_id": "a00670_01404",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a sunlit grassy clearing surrounded by dense green trees, a man in a gray shirt and dark pants swings a long blade at a purple foam target shaped like a humanoid figure. The target, marked with a white \"UP\" sign, splits apart upon impact, its limbs detaching and tumbling across the grass. Fragments scatter as the man follows through, lifting one severed piece before tossing it aside. The scene captures the dynamic collision, deformation, and dispersal of soft foam under force, with gravity pulling the pieces downward."
},
{
"task_id": "task_0255",
"subset": "causal",
"sample_id": "a00670_01404",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a sunlit grassy clearing surrounded by dense green trees, a man in a gray shirt and dark pants swings a long blade at a purple foam target shaped like a humanoid figure. The target, marked with a white \"UP\" sign, splits apart upon impact, its limbs detaching and tumbling across the grass. Fragments scatter as the man follows through, lifting one severed piece before tossing it aside. The scene captures the dynamic collision, deformation, and dispersal of soft foam under force, with gravity pulling the pieces downward."
},
{
"task_id": "task_0256",
"subset": "causal",
"sample_id": "a00361_00060",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A powerful waterfall cascades down a dark, rocky cliff face, its turbulent flow illuminated by scattered light reflections. A person in dark clothing descends rapidly through the falling water, arms outstretched, as they are engulfed by the torrent. The force of the water propels them downward, creating a splash upon impact with the churning pool below, where foam and spray rise from the collision."
},
{
"task_id": "task_0257",
"subset": "causal",
"sample_id": "a00361_00060",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A powerful waterfall cascades down a dark, rocky cliff face, its turbulent flow illuminated by scattered light reflections. A person in dark clothing descends rapidly through the falling water, arms outstretched, as they are engulfed by the torrent. The force of the water propels them downward, creating a splash upon impact with the churning pool below, where foam and spray rise from the collision."
},
{
"task_id": "task_0258",
"subset": "causal",
"sample_id": "a00361_00060",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A powerful waterfall cascades down a dark, rocky cliff face, its turbulent flow illuminated by scattered light reflections. A person in dark clothing descends rapidly through the falling water, arms outstretched, as they are engulfed by the torrent. The force of the water propels them downward, creating a splash upon impact with the churning pool below, where foam and spray rise from the collision."
},
{
"task_id": "task_0259",
"subset": "causal",
"sample_id": "a00361_00060",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A powerful waterfall cascades down a dark, rocky cliff face, its turbulent flow illuminated by scattered light reflections. A person in dark clothing descends rapidly through the falling water, arms outstretched, as they are engulfed by the torrent. The force of the water propels them downward, creating a splash upon impact with the churning pool below, where foam and spray rise from the collision."
},
{
"task_id": "task_0260",
"subset": "causal",
"sample_id": "a00764_02393",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A dark, viscous liquid flows vigorously into a container lined with yellow mesh, creating splashes and ripples as it mixes with the existing contents. The fluid's surface churns, revealing numerous small, wriggling eel-like creatures beneath. Wooden chopsticks rest on the edge of the container, partially submerged, while the surrounding area remains dimly lit, emphasizing the dynamic motion of the liquid and the undulating movement of the creatures within."
},
{
"task_id": "task_0261",
"subset": "causal",
"sample_id": "a00764_02393",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A dark, viscous liquid flows vigorously into a container lined with yellow mesh, creating splashes and ripples as it mixes with the existing contents. The fluid's surface churns, revealing numerous small, wriggling eel-like creatures beneath. Wooden chopsticks rest on the edge of the container, partially submerged, while the surrounding area remains dimly lit, emphasizing the dynamic motion of the liquid and the undulating movement of the creatures within."
},
{
"task_id": "task_0262",
"subset": "causal",
"sample_id": "a00764_02393",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A dark, viscous liquid flows vigorously into a container lined with yellow mesh, creating splashes and ripples as it mixes with the existing contents. The fluid's surface churns, revealing numerous small, wriggling eel-like creatures beneath. Wooden chopsticks rest on the edge of the container, partially submerged, while the surrounding area remains dimly lit, emphasizing the dynamic motion of the liquid and the undulating movement of the creatures within."
},
{
"task_id": "task_0263",
"subset": "causal",
"sample_id": "a00764_02393",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A dark, viscous liquid flows vigorously into a container lined with yellow mesh, creating splashes and ripples as it mixes with the existing contents. The fluid's surface churns, revealing numerous small, wriggling eel-like creatures beneath. Wooden chopsticks rest on the edge of the container, partially submerged, while the surrounding area remains dimly lit, emphasizing the dynamic motion of the liquid and the undulating movement of the creatures within."
},
{
"task_id": "task_0264",
"subset": "causal",
"sample_id": "a00742_01453",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A colorful wooden marble run structure, supported by blue and orange interlocking blocks, features a curved track with a green tree-shaped element and yellow spiral ramp. Brightly colored marbles—green, red, yellow, blue, and purple—roll along the elevated track, descending through curves and spirals, colliding gently as they navigate the path, eventually exiting onto the white surface below under consistent indoor lighting."
},
{
"task_id": "task_0265",
"subset": "causal",
"sample_id": "a00742_01453",
"camera_motion": "right_then_left",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A colorful wooden marble run structure, supported by blue and orange interlocking blocks, features a curved track with a green tree-shaped element and yellow spiral ramp. Brightly colored marbles—green, red, yellow, blue, and purple—roll along the elevated track, descending through curves and spirals, colliding gently as they navigate the path, eventually exiting onto the white surface below under consistent indoor lighting."
},
{
"task_id": "task_0266",
"subset": "causal",
"sample_id": "a00742_01453",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "infinite_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A colorful wooden marble run structure, supported by blue and orange interlocking blocks, features a curved track with a green tree-shaped element and yellow spiral ramp. Brightly colored marbles—green, red, yellow, blue, and purple—roll along the elevated track, descending through curves and spirals, colliding gently as they navigate the path, eventually exiting onto the white surface below under consistent indoor lighting."
},
{
"task_id": "task_0267",
"subset": "causal",
"sample_id": "a00742_01453",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A colorful wooden marble run structure, supported by blue and orange interlocking blocks, features a curved track with a green tree-shaped element and yellow spiral ramp. Brightly colored marbles—green, red, yellow, blue, and purple—roll along the elevated track, descending through curves and spirals, colliding gently as they navigate the path, eventually exiting onto the white surface below under consistent indoor lighting."
},
{
"task_id": "task_0268",
"subset": "causal",
"sample_id": "a00464_00063",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A bulldozer moves forward across a construction site, its blade pushing and compacting loose brown soil into a rising mound. The terrain is uneven, marked by tire tracks and scattered rocks, with concrete curbs lining the perimeter. In the background, a yellow excavator sits idle near a building under construction. As the dozer advances, soil clumps shift and settle under pressure, demonstrating friction and gravity's influence on granular material."
},
{
"task_id": "task_0269",
"subset": "causal",
"sample_id": "a00464_00063",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A bulldozer moves forward across a construction site, its blade pushing and compacting loose brown soil into a rising mound. The terrain is uneven, marked by tire tracks and scattered rocks, with concrete curbs lining the perimeter. In the background, a yellow excavator sits idle near a building under construction. As the dozer advances, soil clumps shift and settle under pressure, demonstrating friction and gravity's influence on granular material."
},
{
"task_id": "task_0270",
"subset": "causal",
"sample_id": "a00464_00063",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A bulldozer moves forward across a construction site, its blade pushing and compacting loose brown soil into a rising mound. The terrain is uneven, marked by tire tracks and scattered rocks, with concrete curbs lining the perimeter. In the background, a yellow excavator sits idle near a building under construction. As the dozer advances, soil clumps shift and settle under pressure, demonstrating friction and gravity's influence on granular material."
},
{
"task_id": "task_0271",
"subset": "causal",
"sample_id": "a00464_00063",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A bulldozer moves forward across a construction site, its blade pushing and compacting loose brown soil into a rising mound. The terrain is uneven, marked by tire tracks and scattered rocks, with concrete curbs lining the perimeter. In the background, a yellow excavator sits idle near a building under construction. As the dozer advances, soil clumps shift and settle under pressure, demonstrating friction and gravity's influence on granular material."
},
{
"task_id": "task_0272",
"subset": "causal",
"sample_id": "a00685_00148",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a kitchen setting with soft ambient lighting, a hand holds a glass jar filled with a cloudy liquid, positioned near an empty mason jar on a wooden surface. A red sponge is placed atop the first jar using tongs, then removed as the jar is tilted to pour the liquid into the second jar. The fluid flows smoothly, creating gentle ripples and a slight splash upon impact, while the background reveals blurred kitchen elements like appliances and containers."
},
{
"task_id": "task_0273",
"subset": "causal",
"sample_id": "a00685_00148",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a kitchen setting with soft ambient lighting, a hand holds a glass jar filled with a cloudy liquid, positioned near an empty mason jar on a wooden surface. A red sponge is placed atop the first jar using tongs, then removed as the jar is tilted to pour the liquid into the second jar. The fluid flows smoothly, creating gentle ripples and a slight splash upon impact, while the background reveals blurred kitchen elements like appliances and containers."
},
{
"task_id": "task_0274",
"subset": "causal",
"sample_id": "a00685_00148",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a kitchen setting with soft ambient lighting, a hand holds a glass jar filled with a cloudy liquid, positioned near an empty mason jar on a wooden surface. A red sponge is placed atop the first jar using tongs, then removed as the jar is tilted to pour the liquid into the second jar. The fluid flows smoothly, creating gentle ripples and a slight splash upon impact, while the background reveals blurred kitchen elements like appliances and containers."
},
{
"task_id": "task_0275",
"subset": "causal",
"sample_id": "a00685_00148",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a kitchen setting with soft ambient lighting, a hand holds a glass jar filled with a cloudy liquid, positioned near an empty mason jar on a wooden surface. A red sponge is placed atop the first jar using tongs, then removed as the jar is tilted to pour the liquid into the second jar. The fluid flows smoothly, creating gentle ripples and a slight splash upon impact, while the background reveals blurred kitchen elements like appliances and containers."
},
{
"task_id": "task_0276",
"subset": "causal",
"sample_id": "a00208_01445",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A yellow toy dump truck with large black tires and yellow rims is positioned on a sandy surface, its bed raised to unload a pile of dark brown soil. The soil tumbles forward in a controlled cascade, forming a small mound as it falls onto the ground. The surrounding environment includes a larger sand dune, sparse green tufts of grass, and a white wall in the background. The truck’s movement is steady, and the soil flows smoothly under gravity, demonstrating the effects of weight and friction as it settles into the terrain."
},
{
"task_id": "task_0277",
"subset": "causal",
"sample_id": "a00208_01445",
"camera_motion": "left_then_right",
"model_a": "matrix_game_3",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A yellow toy dump truck with large black tires and yellow rims is positioned on a sandy surface, its bed raised to unload a pile of dark brown soil. The soil tumbles forward in a controlled cascade, forming a small mound as it falls onto the ground. The surrounding environment includes a larger sand dune, sparse green tufts of grass, and a white wall in the background. The truck’s movement is steady, and the soil flows smoothly under gravity, demonstrating the effects of weight and friction as it settles into the terrain."
},
{
"task_id": "task_0278",
"subset": "causal",
"sample_id": "a00208_01445",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A yellow toy dump truck with large black tires and yellow rims is positioned on a sandy surface, its bed raised to unload a pile of dark brown soil. The soil tumbles forward in a controlled cascade, forming a small mound as it falls onto the ground. The surrounding environment includes a larger sand dune, sparse green tufts of grass, and a white wall in the background. The truck’s movement is steady, and the soil flows smoothly under gravity, demonstrating the effects of weight and friction as it settles into the terrain."
},
{
"task_id": "task_0279",
"subset": "causal",
"sample_id": "a00208_01445",
"camera_motion": "left_then_right",
"model_a": "matrix_game_2",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A yellow toy dump truck with large black tires and yellow rims is positioned on a sandy surface, its bed raised to unload a pile of dark brown soil. The soil tumbles forward in a controlled cascade, forming a small mound as it falls onto the ground. The surrounding environment includes a larger sand dune, sparse green tufts of grass, and a white wall in the background. The truck’s movement is steady, and the soil flows smoothly under gravity, demonstrating the effects of weight and friction as it settles into the terrain."
}
],
"quality_control_tasks": [
{
"task_id": "qc_task_0160",
"subset": "human",
"sample_id": "mem_openhumanvid_4e5e1c0db89294099c90db5e79182763b7289b0501c9f4b87ccec06187a22c53",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "lingbot_world",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Inside a brightly lit indoor space, likely a school hallway, three individuals—a man and two children—stand near a doorway adorned with educational posters about respiratory health. The man, wearing glasses and a brown shirt, listens as the boy in a yellow polo shirt gestures emphatically toward the girl beside him, who wears a matching uniform and a blue headband. She responds by pointing off-screen, drawing attention to something beyond the frame. A red and black bag hangs on the wall, and green plants add a touch of nature to the scene.",
"is_quality_control": true
},
{
"task_id": "qc_task_0122",
"subset": "object",
"sample_id": "sample_272_f1ea4671",
"camera_motion": "right_then_left",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A stately, multi-story house with a light brick facade and large windows sits in a quiet, upscale neighborhood, framed by a bare tree and a muted winter palette.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/",
"is_quality_control": true
},
{
"task_id": "qc_task_0135",
"subset": "object",
"sample_id": "sample_209_b7a80278",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene alpine village nestled among towering peaks, featuring a weathered barn, a curved road, and a clear blue sky, evoking peaceful isolation and natural beauty.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/",
"is_quality_control": true
},
{
"task_id": "qc_task_0202",
"subset": "human",
"sample_id": "mem_openhumanvid_1f35e1c32209f2acdcec26a4e560e1cc7e1fdaad59f891bc03f2b157226c771d",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Two men walk side by side along a dimly lit alley at night, flanked by a bamboo fence and a building with glowing wall lamps. The man on the left wears a dark coat over a white shirt and vest, while the man on the right dons a black leather jacket. They converse as they move forward, occasionally glancing at each other; the man in the coat touches his nose briefly. Their expressions are serious, suggesting a tense or urgent discussion. The atmosphere is quiet and shadowy, with soft ambient lighting casting subtle highlights on their faces and surroundings.",
"is_quality_control": true
},
{
"task_id": "qc_task_0067",
"subset": "environment",
"sample_id": "sample_230_f55277fe",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A lively, illuminated courtyard filled with holiday decorations, surrounded by elegant architecture, where people gather under twinkling lights in a warm, celebratory atmosphere.",
"is_quality_control": true
},
{
"task_id": "qc_task_0153",
"subset": "object",
"sample_id": "sample_103_74c54180",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A serene village square under a blue sky, framed by a stone archway, wooden buildings, and a distant church tower, evoking a timeless atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/",
"is_quality_control": true
},
{
"task_id": "qc_task_0234",
"subset": "causal",
"sample_id": "a00606_01009",
"camera_motion": "right_then_left",
"model_a": "matrix_game_2",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A wooden marble run structure is set up on a white textured surface, supported by red Coca-Cola cans. The track consists of light-colored wooden rails with grooves, arranged in a zigzag pattern. Colored marbles—yellow, red, and blue—roll down the tracks, navigating curves and slopes. Gravity drives their motion, causing collisions and momentum transfer as they descend. The marbles follow the path smoothly, demonstrating rolling friction and inertial movement along the elevated wooden rails.",
"is_quality_control": true
},
{
"task_id": "qc_task_0161",
"subset": "human",
"sample_id": "mem_openhumanvid_4e5e1c0db89294099c90db5e79182763b7289b0501c9f4b87ccec06187a22c53",
"camera_motion": "left_then_right",
"model_a": "hy_worldplay",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"identity",
"appearance"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和人物?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"identity": "人物离开画面再回来后,哪个视频中人物的面部特征(脸型、五官)更像同一个人?",
"appearance": "人物离开画面再回来后,哪个视频中人物的整体外观(衣着、体型、发型)更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "Inside a brightly lit indoor space, likely a school hallway, three individuals—a man and two children—stand near a doorway adorned with educational posters about respiratory health. The man, wearing glasses and a brown shirt, listens as the boy in a yellow polo shirt gestures emphatically toward the girl beside him, who wears a matching uniform and a blue headband. She responds by pointing off-screen, drawing attention to something beyond the frame. A red and black bag hangs on the wall, and green plants add a touch of nature to the scene.",
"is_quality_control": true
},
{
"task_id": "qc_task_0037",
"subset": "environment",
"sample_id": "sample_049_dccd53de",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet mountain village lies under a blue sky, its snow-draped houses and trees creating a serene, wintry atmosphere along a winding road.",
"is_quality_control": true
},
{
"task_id": "qc_task_0004",
"subset": "environment",
"sample_id": "sample_247_b0456067",
"camera_motion": "right_then_left",
"model_a": "hy_worldplay",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A brightly lit, floral-walled room features a woman cradling a baby, surrounded by a white crib and a small table with a lamp, evoking a tender atmosphere.",
"is_quality_control": true
},
{
"task_id": "qc_task_0051",
"subset": "environment",
"sample_id": "sample_262_10a2b882",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"viewpoint",
"layout",
"style",
"lighting"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"viewpoint": "相机转回来后,哪个视频看起来更像是从同一个位置/角度在看同一个场景?",
"layout": "相机转回来后,场景中物体的空间布局(位置关系、远近大小)哪个更一致?",
"style": "视频前后的整体风格(色调、渲染风格、画面质感)哪个更一致?",
"lighting": "视频前后的光照条件(光源方向、亮度、阴影)哪个更一致?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "A serene winter landscape features a snow-covered road flanked by towering trees under an overcast sky, evoking quiet isolation and natural beauty.",
"is_quality_control": true
},
{
"task_id": "qc_task_0275",
"subset": "causal",
"sample_id": "a00685_00148",
"camera_motion": "right_then_left",
"model_a": "lingbot_world",
"model_b": "matrix_game_2",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向右转,再向左转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向右转,再向左转回来",
"caption": "In a kitchen setting with soft ambient lighting, a hand holds a glass jar filled with a cloudy liquid, positioned near an empty mason jar on a wooden surface. A red sponge is placed atop the first jar using tongs, then removed as the jar is tilted to pour the liquid into the second jar. The fluid flows smoothly, creating gentle ripples and a slight splash upon impact, while the background reveals blurred kitchen elements like appliances and containers.",
"is_quality_control": true
},
{
"task_id": "qc_task_0109",
"subset": "object",
"sample_id": "sample_008_93bb110c",
"camera_motion": "left_then_right",
"model_a": "lingbot_world",
"model_b": "yume",
"dimensions": [
"prompt_interaction",
"action_interaction",
"geometry",
"texture"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"geometry": "关注标注出的目标物体:相机转回来后,物体的形状/轮廓哪个更一致?",
"texture": "关注标注出的目标物体:相机转回来后,物体的表面纹理/颜色/材质哪个更一致?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A quiet European village square with timber-framed buildings, cobblestone streets, and pedestrians, bathed in soft overcast light, evoking a calm, nostalgic atmosphere.",
"mask_viz_dir": "eval_outputs/spatial_filter/subset_viz/",
"is_quality_control": true
},
{
"task_id": "qc_task_0259",
"subset": "causal",
"sample_id": "a00361_00060",
"camera_motion": "left_then_right",
"model_a": "infinite_world",
"model_b": "matrix_game_3",
"dimensions": [
"prompt_interaction",
"action_interaction",
"state_progress",
"physical_plausibility"
],
"dimension_questions": {
"prompt_interaction": "阅读下方的文字描述。哪个视频的内容更符合文字所描述的场景和物理过程?",
"action_interaction": "预期的相机运动是'先向左转,再向右转回来'。哪个视频的相机运动更符合这个要求?",
"state_progress": "相机转走再转回来后,场景中的物理过程是否在继续推进?(例如:水位是否比离开前更高?物体是否继续运动?)哪个视频的物理过程更像在持续发展、没有停滞或回退?",
"physical_plausibility": "相机转回来后看到的物理状态是否合理?(例如:水位上涨了合理的量?碾压后的变形程度符合预期?)哪个视频的物理结果更符合真实世界的规律?"
},
"camera_motion_description": "先向左转,再向右转回来",
"caption": "A powerful waterfall cascades down a dark, rocky cliff face, its turbulent flow illuminated by scattered light reflections. A person in dark clothing descends rapidly through the falling water, arms outstretched, as they are engulfed by the torrent. The force of the water propels them downward, creating a splash upon impact with the churning pool below, where foam and spray rise from the collision.",
"is_quality_control": true
}
],
"annotator_assignments": {
"annotator_00": [
"task_0197",
"task_0266",
"task_0151",
"task_0260",
"task_0022",
"task_0154",
"task_0001",
"task_0047",
"task_0134",
"task_0100",
"task_0139",
"task_0066",
"task_0048",
"task_0034",
"task_0187",
"task_0086",
"task_0120",
"task_0170",
"task_0059",
"task_0055",
"task_0052",
"task_0073",
"task_0173",
"task_0065",
"task_0212",
"task_0090",
"task_0267",
"task_0248",
"task_0115",
"task_0273",
"task_0042",
"task_0032",
"task_0020",
"task_0078",
"task_0261",
"task_0257",
"task_0155",
"task_0247",
"task_0222",
"task_0075",
"task_0244",
"task_0203",
"task_0274",
"task_0243",
"task_0275",
"task_0193",
"task_0227",
"task_0109",
"task_0143",
"task_0067",
"task_0101",
"task_0183",
"task_0054",
"task_0206",
"task_0198",
"task_0112",
"task_0125",
"task_0181",
"task_0232",
"task_0040"
],
"annotator_01": [
"task_0197",
"task_0104",
"task_0151",
"task_0260",
"task_0147",
"task_0154",
"task_0001",
"task_0049",
"task_0134",
"task_0100",
"task_0103",
"task_0066",
"task_0048",
"task_0185",
"task_0187",
"task_0086",
"task_0099",
"task_0170",
"task_0059",
"task_0084",
"task_0052",
"task_0073",
"task_0062",
"task_0065",
"task_0212",
"task_0009",
"task_0267",
"task_0248",
"task_0256",
"task_0273",
"task_0042",
"task_0264",
"task_0020",
"task_0078",
"task_0082",
"task_0257",
"task_0155",
"task_0027",
"task_0222",
"task_0075",
"task_0223",
"task_0203",
"task_0274",
"task_0152",
"task_0275",
"task_0193",
"task_0056",
"task_0109",
"task_0143",
"task_0263",
"task_0101",
"task_0183",
"task_0016",
"task_0206",
"task_0198",
"task_0138",
"task_0125",
"task_0181",
"task_0142",
"task_0040"
],
"annotator_02": [
"task_0197",
"task_0104",
"task_0038",
"task_0260",
"task_0147",
"task_0219",
"task_0001",
"task_0049",
"task_0252",
"task_0100",
"task_0103",
"task_0026",
"task_0048",
"task_0185",
"task_0171",
"task_0086",
"task_0099",
"task_0114",
"task_0059",
"task_0084",
"task_0096",
"task_0073",
"task_0062",
"task_0230",
"task_0212",
"task_0009",
"task_0070",
"task_0248",
"task_0256",
"task_0259",
"task_0042",
"task_0264",
"task_0240",
"task_0078",
"task_0082",
"task_0041",
"task_0155",
"task_0027",
"task_0000",
"task_0075",
"task_0223",
"task_0002",
"task_0274",
"task_0152",
"task_0160",
"task_0193",
"task_0056",
"task_0149",
"task_0143",
"task_0263",
"task_0127",
"task_0183",
"task_0016",
"task_0008",
"task_0198",
"task_0138",
"task_0087",
"task_0181",
"task_0142",
"task_0148"
],
"annotator_03": [
"task_0131",
"task_0104",
"task_0038",
"task_0213",
"task_0147",
"task_0219",
"task_0046",
"task_0049",
"task_0252",
"task_0225",
"task_0103",
"task_0026",
"task_0005",
"task_0185",
"task_0171",
"task_0146",
"task_0099",
"task_0114",
"task_0093",
"task_0084",
"task_0096",
"task_0168",
"task_0062",
"task_0230",
"task_0271",
"task_0009",
"task_0070",
"task_0057",
"task_0256",
"task_0259",
"task_0140",
"task_0264",
"task_0240",
"task_0216",
"task_0082",
"task_0041",
"task_0188",
"task_0027",
"task_0000",
"task_0208",
"task_0223",
"task_0002",
"task_0201",
"task_0152",
"task_0160",
"task_0278",
"task_0056",
"task_0149",
"task_0190",
"task_0263",
"task_0127",
"task_0224",
"task_0016",
"task_0008",
"task_0196",
"task_0138",
"task_0087",
"task_0189",
"task_0142",
"task_0148"
],
"annotator_04": [
"task_0131",
"task_0011",
"task_0038",
"task_0213",
"task_0235",
"task_0219",
"task_0046",
"task_0180",
"task_0252",
"task_0225",
"task_0025",
"task_0026",
"task_0005",
"task_0003",
"task_0171",
"task_0146",
"task_0132",
"task_0114",
"task_0093",
"task_0091",
"task_0096",
"task_0168",
"task_0085",
"task_0230",
"task_0271",
"task_0161",
"task_0070",
"task_0057",
"task_0165",
"task_0259",
"task_0140",
"task_0017",
"task_0240",
"task_0216",
"task_0153",
"task_0041",
"task_0188",
"task_0129",
"task_0000",
"task_0208",
"task_0221",
"task_0002",
"task_0201",
"task_0108",
"task_0160",
"task_0278",
"task_0092",
"task_0149",
"task_0190",
"task_0117",
"task_0127",
"task_0224",
"task_0068",
"task_0008",
"task_0196",
"task_0272",
"task_0087",
"task_0189",
"task_0194",
"task_0148"
],
"annotator_05": [
"task_0131",
"task_0011",
"task_0037",
"task_0213",
"task_0235",
"task_0088",
"task_0046",
"task_0180",
"task_0133",
"task_0225",
"task_0025",
"task_0010",
"task_0005",
"task_0003",
"task_0089",
"task_0146",
"task_0132",
"task_0179",
"task_0093",
"task_0091",
"task_0031",
"task_0168",
"task_0085",
"task_0249",
"task_0271",
"task_0161",
"task_0182",
"task_0057",
"task_0165",
"task_0204",
"task_0140",
"task_0017",
"task_0172",
"task_0216",
"task_0153",
"task_0095",
"task_0188",
"task_0129",
"task_0277",
"task_0208",
"task_0221",
"task_0141",
"task_0201",
"task_0108",
"task_0039",
"task_0278",
"task_0092",
"task_0191",
"task_0190",
"task_0117",
"task_0167",
"task_0224",
"task_0068",
"task_0210",
"task_0196",
"task_0272",
"task_0036",
"task_0189",
"task_0194",
"task_0116"
],
"annotator_06": [
"task_0258",
"task_0011",
"task_0037",
"task_0071",
"task_0235",
"task_0088",
"task_0241",
"task_0180",
"task_0133",
"task_0250",
"task_0025",
"task_0010",
"task_0118",
"task_0003",
"task_0089",
"task_0234",
"task_0132",
"task_0179",
"task_0150",
"task_0091",
"task_0031",
"task_0200",
"task_0085",
"task_0249",
"task_0007",
"task_0161",
"task_0182",
"task_0270",
"task_0165",
"task_0204",
"task_0121",
"task_0017",
"task_0172",
"task_0238",
"task_0153",
"task_0095",
"task_0163",
"task_0129",
"task_0277",
"task_0262",
"task_0221",
"task_0141",
"task_0097",
"task_0108",
"task_0039",
"task_0126",
"task_0092",
"task_0191",
"task_0063",
"task_0117",
"task_0167",
"task_0145",
"task_0068",
"task_0210",
"task_0215",
"task_0272",
"task_0036",
"task_0083",
"task_0194",
"task_0116"
],
"annotator_07": [
"task_0258",
"task_0033",
"task_0037",
"task_0071",
"task_0255",
"task_0088",
"task_0241",
"task_0176",
"task_0133",
"task_0250",
"task_0077",
"task_0010",
"task_0118",
"task_0081",
"task_0089",
"task_0234",
"task_0013",
"task_0179",
"task_0150",
"task_0209",
"task_0031",
"task_0200",
"task_0214",
"task_0249",
"task_0007",
"task_0156",
"task_0182",
"task_0270",
"task_0217",
"task_0204",
"task_0121",
"task_0124",
"task_0172",
"task_0238",
"task_0178",
"task_0095",
"task_0163",
"task_0045",
"task_0277",
"task_0262",
"task_0029",
"task_0141",
"task_0097",
"task_0174",
"task_0039",
"task_0126",
"task_0245",
"task_0191",
"task_0063",
"task_0164",
"task_0167",
"task_0145",
"task_0102",
"task_0210",
"task_0215",
"task_0236",
"task_0036",
"task_0083",
"task_0051",
"task_0116"
],
"annotator_08": [
"task_0258",
"task_0033",
"task_0229",
"task_0071",
"task_0255",
"task_0094",
"task_0241",
"task_0176",
"task_0018",
"task_0250",
"task_0077",
"task_0015",
"task_0118",
"task_0081",
"task_0105",
"task_0234",
"task_0013",
"task_0159",
"task_0150",
"task_0209",
"task_0060",
"task_0200",
"task_0214",
"task_0110",
"task_0007",
"task_0156",
"task_0192",
"task_0270",
"task_0217",
"task_0162",
"task_0121",
"task_0124",
"task_0251",
"task_0238",
"task_0178",
"task_0207",
"task_0163",
"task_0045",
"task_0205",
"task_0262",
"task_0029",
"task_0064",
"task_0097",
"task_0174",
"task_0028",
"task_0126",
"task_0245",
"task_0228",
"task_0063",
"task_0164",
"task_0242",
"task_0145",
"task_0102",
"task_0058",
"task_0215",
"task_0236",
"task_0136",
"task_0083",
"task_0051",
"task_0023"
],
"annotator_09": [
"task_0218",
"task_0033",
"task_0229",
"task_0019",
"task_0255",
"task_0094",
"task_0239",
"task_0176",
"task_0018",
"task_0074",
"task_0077",
"task_0015",
"task_0053",
"task_0081",
"task_0105",
"task_0269",
"task_0013",
"task_0159",
"task_0175",
"task_0209",
"task_0060",
"task_0069",
"task_0214",
"task_0110",
"task_0237",
"task_0156",
"task_0192",
"task_0030",
"task_0217",
"task_0162",
"task_0144",
"task_0124",
"task_0251",
"task_0004",
"task_0178",
"task_0207",
"task_0076",
"task_0045",
"task_0205",
"task_0231",
"task_0029",
"task_0064",
"task_0098",
"task_0174",
"task_0028",
"task_0130",
"task_0245",
"task_0228",
"task_0279",
"task_0164",
"task_0242",
"task_0233",
"task_0102",
"task_0058",
"task_0166",
"task_0236",
"task_0136",
"task_0186",
"task_0051",
"task_0023"
],
"annotator_10": [
"task_0218",
"task_0044",
"task_0229",
"task_0019",
"task_0072",
"task_0094",
"task_0239",
"task_0265",
"task_0018",
"task_0074",
"task_0106",
"task_0015",
"task_0053",
"task_0254",
"task_0105",
"task_0269",
"task_0158",
"task_0159",
"task_0175",
"task_0024",
"task_0060",
"task_0069",
"task_0169",
"task_0110",
"task_0237",
"task_0123",
"task_0192",
"task_0030",
"task_0079",
"task_0162",
"task_0144",
"task_0021",
"task_0251",
"task_0004",
"task_0195",
"task_0207",
"task_0076",
"task_0128",
"task_0205",
"task_0231",
"task_0184",
"task_0064",
"task_0098",
"task_0202",
"task_0028",
"task_0130",
"task_0220",
"task_0228",
"task_0279",
"task_0226",
"task_0242",
"task_0233",
"task_0080",
"task_0058",
"task_0166",
"task_0268",
"task_0136",
"task_0186",
"task_0119",
"task_0023"
],
"annotator_11": [
"task_0218",
"task_0044",
"task_0276",
"task_0019",
"task_0072",
"task_0253",
"task_0239",
"task_0265",
"task_0113",
"task_0074",
"task_0106",
"task_0122",
"task_0053",
"task_0254",
"task_0211",
"task_0269",
"task_0158",
"task_0006",
"task_0175",
"task_0024",
"task_0199",
"task_0069",
"task_0169",
"task_0157",
"task_0237",
"task_0123",
"task_0043",
"task_0030",
"task_0079",
"task_0177",
"task_0144",
"task_0021",
"task_0061",
"task_0004",
"task_0195",
"task_0050",
"task_0076",
"task_0128",
"task_0111",
"task_0231",
"task_0184",
"task_0135",
"task_0098",
"task_0202",
"task_0012",
"task_0130",
"task_0220",
"task_0137",
"task_0279",
"task_0226",
"task_0246",
"task_0233",
"task_0080",
"task_0014",
"task_0166",
"task_0268",
"task_0107",
"task_0186",
"task_0119",
"task_0035"
],
"annotator_12": [
"task_0266",
"task_0044",
"task_0276",
"task_0022",
"task_0072",
"task_0253",
"task_0047",
"task_0265",
"task_0113",
"task_0139",
"task_0106",
"task_0122",
"task_0034",
"task_0254",
"task_0211",
"task_0120",
"task_0158",
"task_0006",
"task_0055",
"task_0024",
"task_0199",
"task_0173",
"task_0169",
"task_0157",
"task_0090",
"task_0123",
"task_0043",
"task_0115",
"task_0079",
"task_0177",
"task_0032",
"task_0021",
"task_0061",
"task_0261",
"task_0195",
"task_0050",
"task_0247",
"task_0128",
"task_0111",
"task_0244",
"task_0184",
"task_0135",
"task_0243",
"task_0202",
"task_0012",
"task_0227",
"task_0220",
"task_0137",
"task_0067",
"task_0226",
"task_0246",
"task_0054",
"task_0080",
"task_0014",
"task_0112",
"task_0268",
"task_0107",
"task_0232",
"task_0119",
"task_0035"
],
"annotator_13": [
"task_0266",
"task_0151",
"task_0276",
"task_0022",
"task_0154",
"task_0253",
"task_0047",
"task_0134",
"task_0113",
"task_0139",
"task_0066",
"task_0122",
"task_0034",
"task_0187",
"task_0211",
"task_0120",
"task_0170",
"task_0006",
"task_0055",
"task_0052",
"task_0199",
"task_0173",
"task_0065",
"task_0157",
"task_0090",
"task_0267",
"task_0043",
"task_0115",
"task_0273",
"task_0177",
"task_0032",
"task_0020",
"task_0061",
"task_0261",
"task_0257",
"task_0050",
"task_0247",
"task_0222",
"task_0111",
"task_0244",
"task_0203",
"task_0135",
"task_0243",
"task_0275",
"task_0012",
"task_0227",
"task_0109",
"task_0137",
"task_0067",
"task_0101",
"task_0246",
"task_0054",
"task_0206",
"task_0014",
"task_0112",
"task_0125",
"task_0107",
"task_0232",
"task_0040",
"task_0035"
]
}
}