data/examples/classification/train.json

[
    {
        "id": "a401262",
        "images": [
            "images/a401262/a401262_00.jpg",
            "images/a401262/a401262_01.jpg",
            "images/a401262/a401262_02.jpg",
            "images/a401262/a401262_03.jpg",
            "images/a401262/a401262_04.jpg",
            "images/a401262/a401262_05.jpg",
            "images/a401262/a401262_06.jpg",
            "images/a401262/a401262_07.jpg",
            "images/a401262/a401262_08.jpg",
            "images/a401262/a401262_09.jpg",
            "images/a401262/a401262_10.jpg",
            "images/a401262/a401262_11.jpg",
            "images/a401262/a401262_12.jpg",
            "images/a401262/a401262_13.jpg",
            "images/a401262/a401262_14.jpg",
            "images/a401262/a401262_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Beach with lots of people, beautiful sun, few clouds, big waves, beautiful landscape with coconut trees, ultra realistic, 8k, 1080x1920  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700867",
        "images": [
            "images/a700867/a700867_00.jpg",
            "images/a700867/a700867_01.jpg",
            "images/a700867/a700867_02.jpg",
            "images/a700867/a700867_03.jpg",
            "images/a700867/a700867_04.jpg",
            "images/a700867/a700867_05.jpg",
            "images/a700867/a700867_06.jpg",
            "images/a700867/a700867_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"dog jumping and doing a double backflip in a phsycadelic environment  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400222",
        "images": [
            "images/a400222/a400222_00.jpg",
            "images/a400222/a400222_01.jpg",
            "images/a400222/a400222_02.jpg",
            "images/a400222/a400222_03.jpg",
            "images/a400222/a400222_04.jpg",
            "images/a400222/a400222_05.jpg",
            "images/a400222/a400222_06.jpg",
            "images/a400222/a400222_07.jpg",
            "images/a400222/a400222_08.jpg",
            "images/a400222/a400222_09.jpg",
            "images/a400222/a400222_10.jpg",
            "images/a400222/a400222_11.jpg",
            "images/a400222/a400222_12.jpg",
            "images/a400222/a400222_13.jpg",
            "images/a400222/a400222_14.jpg",
            "images/a400222/a400222_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a beautiful garden with trees also  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500098",
        "images": [
            "images/a500098/a500098_00.jpg",
            "images/a500098/a500098_01.jpg",
            "images/a500098/a500098_02.jpg",
            "images/a500098/a500098_03.jpg",
            "images/a500098/a500098_04.jpg",
            "images/a500098/a500098_05.jpg",
            "images/a500098/a500098_06.jpg",
            "images/a500098/a500098_07.jpg",
            "images/a500098/a500098_08.jpg",
            "images/a500098/a500098_09.jpg",
            "images/a500098/a500098_10.jpg",
            "images/a500098/a500098_11.jpg",
            "images/a500098/a500098_12.jpg",
            "images/a500098/a500098_13.jpg",
            "images/a500098/a500098_14.jpg",
            "images/a500098/a500098_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a man holding a gun stood in the streets of London, dark streets, man lit by streetlights, realistic  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400670",
        "images": [
            "images/a400670/a400670_00.jpg",
            "images/a400670/a400670_01.jpg",
            "images/a400670/a400670_02.jpg",
            "images/a400670/a400670_03.jpg",
            "images/a400670/a400670_04.jpg",
            "images/a400670/a400670_05.jpg",
            "images/a400670/a400670_06.jpg",
            "images/a400670/a400670_07.jpg",
            "images/a400670/a400670_08.jpg",
            "images/a400670/a400670_09.jpg",
            "images/a400670/a400670_10.jpg",
            "images/a400670/a400670_11.jpg",
            "images/a400670/a400670_12.jpg",
            "images/a400670/a400670_13.jpg",
            "images/a400670/a400670_14.jpg",
            "images/a400670/a400670_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"five nights at freddys location with freddy dancing in front of it  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700236",
        "images": [
            "images/a700236/a700236_00.jpg",
            "images/a700236/a700236_01.jpg",
            "images/a700236/a700236_02.jpg",
            "images/a700236/a700236_03.jpg",
            "images/a700236/a700236_04.jpg",
            "images/a700236/a700236_05.jpg",
            "images/a700236/a700236_06.jpg",
            "images/a700236/a700236_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Shurpanakha then approached his younger brother, Lakshmana, who said that he is only second to Ram and therefore not worthy of her.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700223",
        "images": [
            "images/a700223/a700223_00.jpg",
            "images/a700223/a700223_01.jpg",
            "images/a700223/a700223_02.jpg",
            "images/a700223/a700223_03.jpg",
            "images/a700223/a700223_04.jpg",
            "images/a700223/a700223_05.jpg",
            "images/a700223/a700223_06.jpg",
            "images/a700223/a700223_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Roblox noob in the mall dancing with Steven universe  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400913",
        "images": [
            "images/a400913/a400913_00.jpg",
            "images/a400913/a400913_01.jpg",
            "images/a400913/a400913_02.jpg",
            "images/a400913/a400913_03.jpg",
            "images/a400913/a400913_04.jpg",
            "images/a400913/a400913_05.jpg",
            "images/a400913/a400913_06.jpg",
            "images/a400913/a400913_07.jpg",
            "images/a400913/a400913_08.jpg",
            "images/a400913/a400913_09.jpg",
            "images/a400913/a400913_10.jpg",
            "images/a400913/a400913_11.jpg",
            "images/a400913/a400913_12.jpg",
            "images/a400913/a400913_13.jpg",
            "images/a400913/a400913_14.jpg",
            "images/a400913/a400913_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"and architectural knowledge required for these structures spark debates about the true extent of Sumerian technological capabilities.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500147",
        "images": [
            "images/a500147/a500147_00.jpg",
            "images/a500147/a500147_01.jpg",
            "images/a500147/a500147_02.jpg",
            "images/a500147/a500147_03.jpg",
            "images/a500147/a500147_04.jpg",
            "images/a500147/a500147_05.jpg",
            "images/a500147/a500147_06.jpg",
            "images/a500147/a500147_07.jpg",
            "images/a500147/a500147_08.jpg",
            "images/a500147/a500147_09.jpg",
            "images/a500147/a500147_10.jpg",
            "images/a500147/a500147_11.jpg",
            "images/a500147/a500147_12.jpg",
            "images/a500147/a500147_13.jpg",
            "images/a500147/a500147_14.jpg",
            "images/a500147/a500147_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"nick cage driving 300th century mitsubish bose  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400181",
        "images": [
            "images/a400181/a400181_00.jpg",
            "images/a400181/a400181_01.jpg",
            "images/a400181/a400181_02.jpg",
            "images/a400181/a400181_03.jpg",
            "images/a400181/a400181_04.jpg",
            "images/a400181/a400181_05.jpg",
            "images/a400181/a400181_06.jpg",
            "images/a400181/a400181_07.jpg",
            "images/a400181/a400181_08.jpg",
            "images/a400181/a400181_09.jpg",
            "images/a400181/a400181_10.jpg",
            "images/a400181/a400181_11.jpg",
            "images/a400181/a400181_12.jpg",
            "images/a400181/a400181_13.jpg",
            "images/a400181/a400181_14.jpg",
            "images/a400181/a400181_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Display the princess forming alliances with a diverse group of characters. Each one is uniquely designed and brought to life in intricate 3D detail  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700854",
        "images": [
            "images/a700854/a700854_00.jpg",
            "images/a700854/a700854_01.jpg",
            "images/a700854/a700854_02.jpg",
            "images/a700854/a700854_03.jpg",
            "images/a700854/a700854_04.jpg",
            "images/a700854/a700854_05.jpg",
            "images/a700854/a700854_06.jpg",
            "images/a700854/a700854_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"bellagio fountain drive by in tesla plaid, holi colors spraying  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401377",
        "images": [
            "images/a401377/a401377_00.jpg",
            "images/a401377/a401377_01.jpg",
            "images/a401377/a401377_02.jpg",
            "images/a401377/a401377_03.jpg",
            "images/a401377/a401377_04.jpg",
            "images/a401377/a401377_05.jpg",
            "images/a401377/a401377_06.jpg",
            "images/a401377/a401377_07.jpg",
            "images/a401377/a401377_08.jpg",
            "images/a401377/a401377_09.jpg",
            "images/a401377/a401377_10.jpg",
            "images/a401377/a401377_11.jpg",
            "images/a401377/a401377_12.jpg",
            "images/a401377/a401377_13.jpg",
            "images/a401377/a401377_14.jpg",
            "images/a401377/a401377_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a cartoon style clip of a building going from 3 floors to 4 floors in construction  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401442",
        "images": [
            "images/a401442/a401442_00.jpg",
            "images/a401442/a401442_01.jpg",
            "images/a401442/a401442_02.jpg",
            "images/a401442/a401442_03.jpg",
            "images/a401442/a401442_04.jpg",
            "images/a401442/a401442_05.jpg",
            "images/a401442/a401442_06.jpg",
            "images/a401442/a401442_07.jpg",
            "images/a401442/a401442_08.jpg",
            "images/a401442/a401442_09.jpg",
            "images/a401442/a401442_10.jpg",
            "images/a401442/a401442_11.jpg",
            "images/a401442/a401442_12.jpg",
            "images/a401442/a401442_13.jpg",
            "images/a401442/a401442_14.jpg",
            "images/a401442/a401442_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"The background in the fire is driving a car  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700782",
        "images": [
            "images/a700782/a700782_00.jpg",
            "images/a700782/a700782_01.jpg",
            "images/a700782/a700782_02.jpg",
            "images/a700782/a700782_03.jpg",
            "images/a700782/a700782_04.jpg",
            "images/a700782/a700782_05.jpg",
            "images/a700782/a700782_06.jpg",
            "images/a700782/a700782_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Many dogs sitting in subway tunnels  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400096",
        "images": [
            "images/a400096/a400096_00.jpg",
            "images/a400096/a400096_01.jpg",
            "images/a400096/a400096_02.jpg",
            "images/a400096/a400096_03.jpg",
            "images/a400096/a400096_04.jpg",
            "images/a400096/a400096_05.jpg",
            "images/a400096/a400096_06.jpg",
            "images/a400096/a400096_07.jpg",
            "images/a400096/a400096_08.jpg",
            "images/a400096/a400096_09.jpg",
            "images/a400096/a400096_10.jpg",
            "images/a400096/a400096_11.jpg",
            "images/a400096/a400096_12.jpg",
            "images/a400096/a400096_13.jpg",
            "images/a400096/a400096_14.jpg",
            "images/a400096/a400096_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"bartender making a cocktail in a las vegas casino hotel bar  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700683",
        "images": [
            "images/a700683/a700683_00.jpg",
            "images/a700683/a700683_01.jpg",
            "images/a700683/a700683_02.jpg",
            "images/a700683/a700683_03.jpg",
            "images/a700683/a700683_04.jpg",
            "images/a700683/a700683_05.jpg",
            "images/a700683/a700683_06.jpg",
            "images/a700683/a700683_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"darth vader in a mosh pit slam dancing  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400146",
        "images": [
            "images/a400146/a400146_00.jpg",
            "images/a400146/a400146_01.jpg",
            "images/a400146/a400146_02.jpg",
            "images/a400146/a400146_03.jpg",
            "images/a400146/a400146_04.jpg",
            "images/a400146/a400146_05.jpg",
            "images/a400146/a400146_06.jpg",
            "images/a400146/a400146_07.jpg",
            "images/a400146/a400146_08.jpg",
            "images/a400146/a400146_09.jpg",
            "images/a400146/a400146_10.jpg",
            "images/a400146/a400146_11.jpg",
            "images/a400146/a400146_12.jpg",
            "images/a400146/a400146_13.jpg",
            "images/a400146/a400146_14.jpg",
            "images/a400146/a400146_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A wolf raising its paw to a tree trunk while laughing  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700465",
        "images": [
            "images/a700465/a700465_00.jpg",
            "images/a700465/a700465_01.jpg",
            "images/a700465/a700465_02.jpg",
            "images/a700465/a700465_03.jpg",
            "images/a700465/a700465_04.jpg",
            "images/a700465/a700465_05.jpg",
            "images/a700465/a700465_06.jpg",
            "images/a700465/a700465_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A fashion model wearing a black and white fur coat walks on the catwalk and moves  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700774",
        "images": [
            "images/a700774/a700774_00.jpg",
            "images/a700774/a700774_01.jpg",
            "images/a700774/a700774_02.jpg",
            "images/a700774/a700774_03.jpg",
            "images/a700774/a700774_04.jpg",
            "images/a700774/a700774_05.jpg",
            "images/a700774/a700774_06.jpg",
            "images/a700774/a700774_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a woman sitting on a lotus with different hindu elements in motion in her hand 10 seconds video  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400883",
        "images": [
            "images/a400883/a400883_00.jpg",
            "images/a400883/a400883_01.jpg",
            "images/a400883/a400883_02.jpg",
            "images/a400883/a400883_03.jpg",
            "images/a400883/a400883_04.jpg",
            "images/a400883/a400883_05.jpg",
            "images/a400883/a400883_06.jpg",
            "images/a400883/a400883_07.jpg",
            "images/a400883/a400883_08.jpg",
            "images/a400883/a400883_09.jpg",
            "images/a400883/a400883_10.jpg",
            "images/a400883/a400883_11.jpg",
            "images/a400883/a400883_12.jpg",
            "images/a400883/a400883_13.jpg",
            "images/a400883/a400883_14.jpg",
            "images/a400883/a400883_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"the boy enjoying song using headset  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700373",
        "images": [
            "images/a700373/a700373_00.jpg",
            "images/a700373/a700373_01.jpg",
            "images/a700373/a700373_02.jpg",
            "images/a700373/a700373_03.jpg",
            "images/a700373/a700373_04.jpg",
            "images/a700373/a700373_05.jpg",
            "images/a700373/a700373_06.jpg",
            "images/a700373/a700373_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a spirit being made of technology, rainbow electricity, and flames. 4k, hyperrealism  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400137",
        "images": [
            "images/a400137/a400137_00.jpg",
            "images/a400137/a400137_01.jpg",
            "images/a400137/a400137_02.jpg",
            "images/a400137/a400137_03.jpg",
            "images/a400137/a400137_04.jpg",
            "images/a400137/a400137_05.jpg",
            "images/a400137/a400137_06.jpg",
            "images/a400137/a400137_07.jpg",
            "images/a400137/a400137_08.jpg",
            "images/a400137/a400137_09.jpg",
            "images/a400137/a400137_10.jpg",
            "images/a400137/a400137_11.jpg",
            "images/a400137/a400137_12.jpg",
            "images/a400137/a400137_13.jpg",
            "images/a400137/a400137_14.jpg",
            "images/a400137/a400137_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"ancient egypt construction of the pyramid, sunset lighting, lens flares, Cinematic camera motion, clouds running  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400579",
        "images": [
            "images/a400579/a400579_00.jpg",
            "images/a400579/a400579_01.jpg",
            "images/a400579/a400579_02.jpg",
            "images/a400579/a400579_03.jpg",
            "images/a400579/a400579_04.jpg",
            "images/a400579/a400579_05.jpg",
            "images/a400579/a400579_06.jpg",
            "images/a400579/a400579_07.jpg",
            "images/a400579/a400579_08.jpg",
            "images/a400579/a400579_09.jpg",
            "images/a400579/a400579_10.jpg",
            "images/a400579/a400579_11.jpg",
            "images/a400579/a400579_12.jpg",
            "images/a400579/a400579_13.jpg",
            "images/a400579/a400579_14.jpg",
            "images/a400579/a400579_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A group of men visibly joyful and enthusiastic, celebrating amidst a display of money, conveying a sense of enjoyment and success.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400553",
        "images": [
            "images/a400553/a400553_00.jpg",
            "images/a400553/a400553_01.jpg",
            "images/a400553/a400553_02.jpg",
            "images/a400553/a400553_03.jpg",
            "images/a400553/a400553_04.jpg",
            "images/a400553/a400553_05.jpg",
            "images/a400553/a400553_06.jpg",
            "images/a400553/a400553_07.jpg",
            "images/a400553/a400553_08.jpg",
            "images/a400553/a400553_09.jpg",
            "images/a400553/a400553_10.jpg",
            "images/a400553/a400553_11.jpg",
            "images/a400553/a400553_12.jpg",
            "images/a400553/a400553_13.jpg",
            "images/a400553/a400553_14.jpg",
            "images/a400553/a400553_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a snow day in the jungle vector style  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500448",
        "images": [
            "images/a500448/a500448_00.jpg",
            "images/a500448/a500448_01.jpg",
            "images/a500448/a500448_02.jpg",
            "images/a500448/a500448_03.jpg",
            "images/a500448/a500448_04.jpg",
            "images/a500448/a500448_05.jpg",
            "images/a500448/a500448_06.jpg",
            "images/a500448/a500448_07.jpg",
            "images/a500448/a500448_08.jpg",
            "images/a500448/a500448_09.jpg",
            "images/a500448/a500448_10.jpg",
            "images/a500448/a500448_11.jpg",
            "images/a500448/a500448_12.jpg",
            "images/a500448/a500448_13.jpg",
            "images/a500448/a500448_14.jpg",
            "images/a500448/a500448_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"batman and robyn fighting joker on roof top at night  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500441",
        "images": [
            "images/a500441/a500441_00.jpg",
            "images/a500441/a500441_01.jpg",
            "images/a500441/a500441_02.jpg",
            "images/a500441/a500441_03.jpg",
            "images/a500441/a500441_04.jpg",
            "images/a500441/a500441_05.jpg",
            "images/a500441/a500441_06.jpg",
            "images/a500441/a500441_07.jpg",
            "images/a500441/a500441_08.jpg",
            "images/a500441/a500441_09.jpg",
            "images/a500441/a500441_10.jpg",
            "images/a500441/a500441_11.jpg",
            "images/a500441/a500441_12.jpg",
            "images/a500441/a500441_13.jpg",
            "images/a500441/a500441_14.jpg",
            "images/a500441/a500441_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A desolate, moonlit landscape with ruins scattered around, remnants of a once vibrant city. The air is charged with tension as two powerful anime characters face off in an epic battle.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700840",
        "images": [
            "images/a700840/a700840_00.jpg",
            "images/a700840/a700840_01.jpg",
            "images/a700840/a700840_02.jpg",
            "images/a700840/a700840_03.jpg",
            "images/a700840/a700840_04.jpg",
            "images/a700840/a700840_05.jpg",
            "images/a700840/a700840_06.jpg",
            "images/a700840/a700840_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Luna and Leo standing in awe before a field of floating bubbles, each bubble encapsulating a different scene of wonder and delight.Animated cartoon  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500078",
        "images": [
            "images/a500078/a500078_00.jpg",
            "images/a500078/a500078_01.jpg",
            "images/a500078/a500078_02.jpg",
            "images/a500078/a500078_03.jpg",
            "images/a500078/a500078_04.jpg",
            "images/a500078/a500078_05.jpg",
            "images/a500078/a500078_06.jpg",
            "images/a500078/a500078_07.jpg",
            "images/a500078/a500078_08.jpg",
            "images/a500078/a500078_09.jpg",
            "images/a500078/a500078_10.jpg",
            "images/a500078/a500078_11.jpg",
            "images/a500078/a500078_12.jpg",
            "images/a500078/a500078_13.jpg",
            "images/a500078/a500078_14.jpg",
            "images/a500078/a500078_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"macro editorial photo of the front of buildings, many glass towers, minimalist, muted tones  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500367",
        "images": [
            "images/a500367/a500367_00.jpg",
            "images/a500367/a500367_01.jpg",
            "images/a500367/a500367_02.jpg",
            "images/a500367/a500367_03.jpg",
            "images/a500367/a500367_04.jpg",
            "images/a500367/a500367_05.jpg",
            "images/a500367/a500367_06.jpg",
            "images/a500367/a500367_07.jpg",
            "images/a500367/a500367_08.jpg",
            "images/a500367/a500367_09.jpg",
            "images/a500367/a500367_10.jpg",
            "images/a500367/a500367_11.jpg",
            "images/a500367/a500367_12.jpg",
            "images/a500367/a500367_13.jpg",
            "images/a500367/a500367_14.jpg",
            "images/a500367/a500367_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"india, news, broadcasting, the man is telling the news.4k, high detaild,  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401319",
        "images": [
            "images/a401319/a401319_00.jpg",
            "images/a401319/a401319_01.jpg",
            "images/a401319/a401319_02.jpg",
            "images/a401319/a401319_03.jpg",
            "images/a401319/a401319_04.jpg",
            "images/a401319/a401319_05.jpg",
            "images/a401319/a401319_06.jpg",
            "images/a401319/a401319_07.jpg",
            "images/a401319/a401319_08.jpg",
            "images/a401319/a401319_09.jpg",
            "images/a401319/a401319_10.jpg",
            "images/a401319/a401319_11.jpg",
            "images/a401319/a401319_12.jpg",
            "images/a401319/a401319_13.jpg",
            "images/a401319/a401319_14.jpg",
            "images/a401319/a401319_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"The first picture shows the moment when a person meets a robot. A person expresses surprise and joy at seeing an intelligent robot standing in front of him, motion 2  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401355",
        "images": [
            "images/a401355/a401355_00.jpg",
            "images/a401355/a401355_01.jpg",
            "images/a401355/a401355_02.jpg",
            "images/a401355/a401355_03.jpg",
            "images/a401355/a401355_04.jpg",
            "images/a401355/a401355_05.jpg",
            "images/a401355/a401355_06.jpg",
            "images/a401355/a401355_07.jpg",
            "images/a401355/a401355_08.jpg",
            "images/a401355/a401355_09.jpg",
            "images/a401355/a401355_10.jpg",
            "images/a401355/a401355_11.jpg",
            "images/a401355/a401355_12.jpg",
            "images/a401355/a401355_13.jpg",
            "images/a401355/a401355_14.jpg",
            "images/a401355/a401355_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Cartoon factory background suitable for children  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401258",
        "images": [
            "images/a401258/a401258_00.jpg",
            "images/a401258/a401258_01.jpg",
            "images/a401258/a401258_02.jpg",
            "images/a401258/a401258_03.jpg",
            "images/a401258/a401258_04.jpg",
            "images/a401258/a401258_05.jpg",
            "images/a401258/a401258_06.jpg",
            "images/a401258/a401258_07.jpg",
            "images/a401258/a401258_08.jpg",
            "images/a401258/a401258_09.jpg",
            "images/a401258/a401258_10.jpg",
            "images/a401258/a401258_11.jpg",
            "images/a401258/a401258_12.jpg",
            "images/a401258/a401258_13.jpg",
            "images/a401258/a401258_14.jpg",
            "images/a401258/a401258_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Phoebe Cates Sitting at Boss Desk as Yakuza Boss  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401335",
        "images": [
            "images/a401335/a401335_00.jpg",
            "images/a401335/a401335_01.jpg",
            "images/a401335/a401335_02.jpg",
            "images/a401335/a401335_03.jpg",
            "images/a401335/a401335_04.jpg",
            "images/a401335/a401335_05.jpg",
            "images/a401335/a401335_06.jpg",
            "images/a401335/a401335_07.jpg",
            "images/a401335/a401335_08.jpg",
            "images/a401335/a401335_09.jpg",
            "images/a401335/a401335_10.jpg",
            "images/a401335/a401335_11.jpg",
            "images/a401335/a401335_12.jpg",
            "images/a401335/a401335_13.jpg",
            "images/a401335/a401335_14.jpg",
            "images/a401335/a401335_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"(Valera the worm crawls through the ground and gets to the surface. He sees Margot the Bug, who jumps with light jumps.)  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401394",
        "images": [
            "images/a401394/a401394_00.jpg",
            "images/a401394/a401394_01.jpg",
            "images/a401394/a401394_02.jpg",
            "images/a401394/a401394_03.jpg",
            "images/a401394/a401394_04.jpg",
            "images/a401394/a401394_05.jpg",
            "images/a401394/a401394_06.jpg",
            "images/a401394/a401394_07.jpg",
            "images/a401394/a401394_08.jpg",
            "images/a401394/a401394_09.jpg",
            "images/a401394/a401394_10.jpg",
            "images/a401394/a401394_11.jpg",
            "images/a401394/a401394_12.jpg",
            "images/a401394/a401394_13.jpg",
            "images/a401394/a401394_14.jpg",
            "images/a401394/a401394_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"white electricity crackling, the air fizzing in excitement.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500424",
        "images": [
            "images/a500424/a500424_00.jpg",
            "images/a500424/a500424_01.jpg",
            "images/a500424/a500424_02.jpg",
            "images/a500424/a500424_03.jpg",
            "images/a500424/a500424_04.jpg",
            "images/a500424/a500424_05.jpg",
            "images/a500424/a500424_06.jpg",
            "images/a500424/a500424_07.jpg",
            "images/a500424/a500424_08.jpg",
            "images/a500424/a500424_09.jpg",
            "images/a500424/a500424_10.jpg",
            "images/a500424/a500424_11.jpg",
            "images/a500424/a500424_12.jpg",
            "images/a500424/a500424_13.jpg",
            "images/a500424/a500424_14.jpg",
            "images/a500424/a500424_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"cartoon video of a fiery explosion  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700396",
        "images": [
            "images/a700396/a700396_00.jpg",
            "images/a700396/a700396_01.jpg",
            "images/a700396/a700396_02.jpg",
            "images/a700396/a700396_03.jpg",
            "images/a700396/a700396_04.jpg",
            "images/a700396/a700396_05.jpg",
            "images/a700396/a700396_06.jpg",
            "images/a700396/a700396_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Little Blue carefully carries the bird with its mouth to a warm place.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400613",
        "images": [
            "images/a400613/a400613_00.jpg",
            "images/a400613/a400613_01.jpg",
            "images/a400613/a400613_02.jpg",
            "images/a400613/a400613_03.jpg",
            "images/a400613/a400613_04.jpg",
            "images/a400613/a400613_05.jpg",
            "images/a400613/a400613_06.jpg",
            "images/a400613/a400613_07.jpg",
            "images/a400613/a400613_08.jpg",
            "images/a400613/a400613_09.jpg",
            "images/a400613/a400613_10.jpg",
            "images/a400613/a400613_11.jpg",
            "images/a400613/a400613_12.jpg",
            "images/a400613/a400613_13.jpg",
            "images/a400613/a400613_14.jpg",
            "images/a400613/a400613_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a cyberpunk woman staring at you from the top of a building while it is raining 4K ultra realistic  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400435",
        "images": [
            "images/a400435/a400435_00.jpg",
            "images/a400435/a400435_01.jpg",
            "images/a400435/a400435_02.jpg",
            "images/a400435/a400435_03.jpg",
            "images/a400435/a400435_04.jpg",
            "images/a400435/a400435_05.jpg",
            "images/a400435/a400435_06.jpg",
            "images/a400435/a400435_07.jpg",
            "images/a400435/a400435_08.jpg",
            "images/a400435/a400435_09.jpg",
            "images/a400435/a400435_10.jpg",
            "images/a400435/a400435_11.jpg",
            "images/a400435/a400435_12.jpg",
            "images/a400435/a400435_13.jpg",
            "images/a400435/a400435_14.jpg",
            "images/a400435/a400435_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"aan hugging his motorcycle on a roadside near an ocean  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700216",
        "images": [
            "images/a700216/a700216_00.jpg",
            "images/a700216/a700216_01.jpg",
            "images/a700216/a700216_02.jpg",
            "images/a700216/a700216_03.jpg",
            "images/a700216/a700216_04.jpg",
            "images/a700216/a700216_05.jpg",
            "images/a700216/a700216_06.jpg",
            "images/a700216/a700216_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Many centuries ago, in the splendor of ancient Jerusalem  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401172",
        "images": [
            "images/a401172/a401172_00.jpg",
            "images/a401172/a401172_01.jpg",
            "images/a401172/a401172_02.jpg",
            "images/a401172/a401172_03.jpg",
            "images/a401172/a401172_04.jpg",
            "images/a401172/a401172_05.jpg",
            "images/a401172/a401172_06.jpg",
            "images/a401172/a401172_07.jpg",
            "images/a401172/a401172_08.jpg",
            "images/a401172/a401172_09.jpg",
            "images/a401172/a401172_10.jpg",
            "images/a401172/a401172_11.jpg",
            "images/a401172/a401172_12.jpg",
            "images/a401172/a401172_13.jpg",
            "images/a401172/a401172_14.jpg",
            "images/a401172/a401172_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"writer who write amazing song lyrics  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500468",
        "images": [
            "images/a500468/a500468_00.jpg",
            "images/a500468/a500468_01.jpg",
            "images/a500468/a500468_02.jpg",
            "images/a500468/a500468_03.jpg",
            "images/a500468/a500468_04.jpg",
            "images/a500468/a500468_05.jpg",
            "images/a500468/a500468_06.jpg",
            "images/a500468/a500468_07.jpg",
            "images/a500468/a500468_08.jpg",
            "images/a500468/a500468_09.jpg",
            "images/a500468/a500468_10.jpg",
            "images/a500468/a500468_11.jpg",
            "images/a500468/a500468_12.jpg",
            "images/a500468/a500468_13.jpg",
            "images/a500468/a500468_14.jpg",
            "images/a500468/a500468_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"cinematic scene bird eating worm blur  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500492",
        "images": [
            "images/a500492/a500492_00.jpg",
            "images/a500492/a500492_01.jpg",
            "images/a500492/a500492_02.jpg",
            "images/a500492/a500492_03.jpg",
            "images/a500492/a500492_04.jpg",
            "images/a500492/a500492_05.jpg",
            "images/a500492/a500492_06.jpg",
            "images/a500492/a500492_07.jpg",
            "images/a500492/a500492_08.jpg",
            "images/a500492/a500492_09.jpg",
            "images/a500492/a500492_10.jpg",
            "images/a500492/a500492_11.jpg",
            "images/a500492/a500492_12.jpg",
            "images/a500492/a500492_13.jpg",
            "images/a500492/a500492_14.jpg",
            "images/a500492/a500492_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"rabbit eating a carrot in the style of Disney Pixar  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400290",
        "images": [
            "images/a400290/a400290_00.jpg",
            "images/a400290/a400290_01.jpg",
            "images/a400290/a400290_02.jpg",
            "images/a400290/a400290_03.jpg",
            "images/a400290/a400290_04.jpg",
            "images/a400290/a400290_05.jpg",
            "images/a400290/a400290_06.jpg",
            "images/a400290/a400290_07.jpg",
            "images/a400290/a400290_08.jpg",
            "images/a400290/a400290_09.jpg",
            "images/a400290/a400290_10.jpg",
            "images/a400290/a400290_11.jpg",
            "images/a400290/a400290_12.jpg",
            "images/a400290/a400290_13.jpg",
            "images/a400290/a400290_14.jpg",
            "images/a400290/a400290_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"red room with multiple doors on left and right side and narrow in the end  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401186",
        "images": [
            "images/a401186/a401186_00.jpg",
            "images/a401186/a401186_01.jpg",
            "images/a401186/a401186_02.jpg",
            "images/a401186/a401186_03.jpg",
            "images/a401186/a401186_04.jpg",
            "images/a401186/a401186_05.jpg",
            "images/a401186/a401186_06.jpg",
            "images/a401186/a401186_07.jpg",
            "images/a401186/a401186_08.jpg",
            "images/a401186/a401186_09.jpg",
            "images/a401186/a401186_10.jpg",
            "images/a401186/a401186_11.jpg",
            "images/a401186/a401186_12.jpg",
            "images/a401186/a401186_13.jpg",
            "images/a401186/a401186_14.jpg",
            "images/a401186/a401186_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a lifelike octopus swimming in a dank underwater cave  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700251",
        "images": [
            "images/a700251/a700251_00.jpg",
            "images/a700251/a700251_01.jpg",
            "images/a700251/a700251_02.jpg",
            "images/a700251/a700251_03.jpg",
            "images/a700251/a700251_04.jpg",
            "images/a700251/a700251_05.jpg",
            "images/a700251/a700251_06.jpg",
            "images/a700251/a700251_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A girl, surrounded by a snowy landscape, with the wind tousling her hair, as the camera slowly zooms in.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400797",
        "images": [
            "images/a400797/a400797_00.jpg",
            "images/a400797/a400797_01.jpg",
            "images/a400797/a400797_02.jpg",
            "images/a400797/a400797_03.jpg",
            "images/a400797/a400797_04.jpg",
            "images/a400797/a400797_05.jpg",
            "images/a400797/a400797_06.jpg",
            "images/a400797/a400797_07.jpg",
            "images/a400797/a400797_08.jpg",
            "images/a400797/a400797_09.jpg",
            "images/a400797/a400797_10.jpg",
            "images/a400797/a400797_11.jpg",
            "images/a400797/a400797_12.jpg",
            "images/a400797/a400797_13.jpg",
            "images/a400797/a400797_14.jpg",
            "images/a400797/a400797_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"action scene from tom cruize movies  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400097",
        "images": [
            "images/a400097/a400097_00.jpg",
            "images/a400097/a400097_01.jpg",
            "images/a400097/a400097_02.jpg",
            "images/a400097/a400097_03.jpg",
            "images/a400097/a400097_04.jpg",
            "images/a400097/a400097_05.jpg",
            "images/a400097/a400097_06.jpg",
            "images/a400097/a400097_07.jpg",
            "images/a400097/a400097_08.jpg",
            "images/a400097/a400097_09.jpg",
            "images/a400097/a400097_10.jpg",
            "images/a400097/a400097_11.jpg",
            "images/a400097/a400097_12.jpg",
            "images/a400097/a400097_13.jpg",
            "images/a400097/a400097_14.jpg",
            "images/a400097/a400097_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a cyclone moving in the night  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400490",
        "images": [
            "images/a400490/a400490_00.jpg",
            "images/a400490/a400490_01.jpg",
            "images/a400490/a400490_02.jpg",
            "images/a400490/a400490_03.jpg",
            "images/a400490/a400490_04.jpg",
            "images/a400490/a400490_05.jpg",
            "images/a400490/a400490_06.jpg",
            "images/a400490/a400490_07.jpg",
            "images/a400490/a400490_08.jpg",
            "images/a400490/a400490_09.jpg",
            "images/a400490/a400490_10.jpg",
            "images/a400490/a400490_11.jpg",
            "images/a400490/a400490_12.jpg",
            "images/a400490/a400490_13.jpg",
            "images/a400490/a400490_14.jpg",
            "images/a400490/a400490_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"ceiling fan, one ceiling fan, zido, maxar, fans, fans hals, digital art h 9 6 0, productphoto, scenic full shot, rendering, tall ceiling, bottom angle, h 576, isometric view, wide portrait, h 8 0 0 c 1 0. 0, overhead angle, low ceiling  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401065",
        "images": [
            "images/a401065/a401065_00.jpg",
            "images/a401065/a401065_01.jpg",
            "images/a401065/a401065_02.jpg",
            "images/a401065/a401065_03.jpg",
            "images/a401065/a401065_04.jpg",
            "images/a401065/a401065_05.jpg",
            "images/a401065/a401065_06.jpg",
            "images/a401065/a401065_07.jpg",
            "images/a401065/a401065_08.jpg",
            "images/a401065/a401065_09.jpg",
            "images/a401065/a401065_10.jpg",
            "images/a401065/a401065_11.jpg",
            "images/a401065/a401065_12.jpg",
            "images/a401065/a401065_13.jpg",
            "images/a401065/a401065_14.jpg",
            "images/a401065/a401065_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"create a video of a boy playing basketball  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500135",
        "images": [
            "images/a500135/a500135_00.jpg",
            "images/a500135/a500135_01.jpg",
            "images/a500135/a500135_02.jpg",
            "images/a500135/a500135_03.jpg",
            "images/a500135/a500135_04.jpg",
            "images/a500135/a500135_05.jpg",
            "images/a500135/a500135_06.jpg",
            "images/a500135/a500135_07.jpg",
            "images/a500135/a500135_08.jpg",
            "images/a500135/a500135_09.jpg",
            "images/a500135/a500135_10.jpg",
            "images/a500135/a500135_11.jpg",
            "images/a500135/a500135_12.jpg",
            "images/a500135/a500135_13.jpg",
            "images/a500135/a500135_14.jpg",
            "images/a500135/a500135_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"welcome to my channel crypto addict 8k stunning visual ar: 9:16  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400487",
        "images": [
            "images/a400487/a400487_00.jpg",
            "images/a400487/a400487_01.jpg",
            "images/a400487/a400487_02.jpg",
            "images/a400487/a400487_03.jpg",
            "images/a400487/a400487_04.jpg",
            "images/a400487/a400487_05.jpg",
            "images/a400487/a400487_06.jpg",
            "images/a400487/a400487_07.jpg",
            "images/a400487/a400487_08.jpg",
            "images/a400487/a400487_09.jpg",
            "images/a400487/a400487_10.jpg",
            "images/a400487/a400487_11.jpg",
            "images/a400487/a400487_12.jpg",
            "images/a400487/a400487_13.jpg",
            "images/a400487/a400487_14.jpg",
            "images/a400487/a400487_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a benevolent artificial intelligence designed to save the world, a female digital faerie, realistic, tinker bell, traces of light, eyes opening, motion 3, dark, electronic  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500010",
        "images": [
            "images/a500010/a500010_00.jpg",
            "images/a500010/a500010_01.jpg",
            "images/a500010/a500010_02.jpg",
            "images/a500010/a500010_03.jpg",
            "images/a500010/a500010_04.jpg",
            "images/a500010/a500010_05.jpg",
            "images/a500010/a500010_06.jpg",
            "images/a500010/a500010_07.jpg",
            "images/a500010/a500010_08.jpg",
            "images/a500010/a500010_09.jpg",
            "images/a500010/a500010_10.jpg",
            "images/a500010/a500010_11.jpg",
            "images/a500010/a500010_12.jpg",
            "images/a500010/a500010_13.jpg",
            "images/a500010/a500010_14.jpg",
            "images/a500010/a500010_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a druid in the forest cinematic  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400730",
        "images": [
            "images/a400730/a400730_00.jpg",
            "images/a400730/a400730_01.jpg",
            "images/a400730/a400730_02.jpg",
            "images/a400730/a400730_03.jpg",
            "images/a400730/a400730_04.jpg",
            "images/a400730/a400730_05.jpg",
            "images/a400730/a400730_06.jpg",
            "images/a400730/a400730_07.jpg",
            "images/a400730/a400730_08.jpg",
            "images/a400730/a400730_09.jpg",
            "images/a400730/a400730_10.jpg",
            "images/a400730/a400730_11.jpg",
            "images/a400730/a400730_12.jpg",
            "images/a400730/a400730_13.jpg",
            "images/a400730/a400730_14.jpg",
            "images/a400730/a400730_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"the darkness is coming. It is inevitable.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400436",
        "images": [
            "images/a400436/a400436_00.jpg",
            "images/a400436/a400436_01.jpg",
            "images/a400436/a400436_02.jpg",
            "images/a400436/a400436_03.jpg",
            "images/a400436/a400436_04.jpg",
            "images/a400436/a400436_05.jpg",
            "images/a400436/a400436_06.jpg",
            "images/a400436/a400436_07.jpg",
            "images/a400436/a400436_08.jpg",
            "images/a400436/a400436_09.jpg",
            "images/a400436/a400436_10.jpg",
            "images/a400436/a400436_11.jpg",
            "images/a400436/a400436_12.jpg",
            "images/a400436/a400436_13.jpg",
            "images/a400436/a400436_14.jpg",
            "images/a400436/a400436_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A painting made by Zues throwing a bolt of light  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700508",
        "images": [
            "images/a700508/a700508_00.jpg",
            "images/a700508/a700508_01.jpg",
            "images/a700508/a700508_02.jpg",
            "images/a700508/a700508_03.jpg",
            "images/a700508/a700508_04.jpg",
            "images/a700508/a700508_05.jpg",
            "images/a700508/a700508_06.jpg",
            "images/a700508/a700508_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"haunted Abandoned hospital washroom and a girl lock herself in one washroom  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400284",
        "images": [
            "images/a400284/a400284_00.jpg",
            "images/a400284/a400284_01.jpg",
            "images/a400284/a400284_02.jpg",
            "images/a400284/a400284_03.jpg",
            "images/a400284/a400284_04.jpg",
            "images/a400284/a400284_05.jpg",
            "images/a400284/a400284_06.jpg",
            "images/a400284/a400284_07.jpg",
            "images/a400284/a400284_08.jpg",
            "images/a400284/a400284_09.jpg",
            "images/a400284/a400284_10.jpg",
            "images/a400284/a400284_11.jpg",
            "images/a400284/a400284_12.jpg",
            "images/a400284/a400284_13.jpg",
            "images/a400284/a400284_14.jpg",
            "images/a400284/a400284_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"abandoned shed styled like traditional japanese architecture, falling apart, broken, overgrown, in a magical forest, golden lighting, ghibli anime style  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700495",
        "images": [
            "images/a700495/a700495_00.jpg",
            "images/a700495/a700495_01.jpg",
            "images/a700495/a700495_02.jpg",
            "images/a700495/a700495_03.jpg",
            "images/a700495/a700495_04.jpg",
            "images/a700495/a700495_05.jpg",
            "images/a700495/a700495_06.jpg",
            "images/a700495/a700495_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"future world becomes more intelligent and very advanced building, modern city, machinary and cars  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400376",
        "images": [
            "images/a400376/a400376_00.jpg",
            "images/a400376/a400376_01.jpg",
            "images/a400376/a400376_02.jpg",
            "images/a400376/a400376_03.jpg",
            "images/a400376/a400376_04.jpg",
            "images/a400376/a400376_05.jpg",
            "images/a400376/a400376_06.jpg",
            "images/a400376/a400376_07.jpg",
            "images/a400376/a400376_08.jpg",
            "images/a400376/a400376_09.jpg",
            "images/a400376/a400376_10.jpg",
            "images/a400376/a400376_11.jpg",
            "images/a400376/a400376_12.jpg",
            "images/a400376/a400376_13.jpg",
            "images/a400376/a400376_14.jpg",
            "images/a400376/a400376_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"whirling sufi dance ,rotating , vibrant color, vortex, realistic, vivid colors, highly detailed  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400155",
        "images": [
            "images/a400155/a400155_00.jpg",
            "images/a400155/a400155_01.jpg",
            "images/a400155/a400155_02.jpg",
            "images/a400155/a400155_03.jpg",
            "images/a400155/a400155_04.jpg",
            "images/a400155/a400155_05.jpg",
            "images/a400155/a400155_06.jpg",
            "images/a400155/a400155_07.jpg",
            "images/a400155/a400155_08.jpg",
            "images/a400155/a400155_09.jpg",
            "images/a400155/a400155_10.jpg",
            "images/a400155/a400155_11.jpg",
            "images/a400155/a400155_12.jpg",
            "images/a400155/a400155_13.jpg",
            "images/a400155/a400155_14.jpg",
            "images/a400155/a400155_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"woman filmaker, films a beatiful sunset in mountain  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500136",
        "images": [
            "images/a500136/a500136_00.jpg",
            "images/a500136/a500136_01.jpg",
            "images/a500136/a500136_02.jpg",
            "images/a500136/a500136_03.jpg",
            "images/a500136/a500136_04.jpg",
            "images/a500136/a500136_05.jpg",
            "images/a500136/a500136_06.jpg",
            "images/a500136/a500136_07.jpg",
            "images/a500136/a500136_08.jpg",
            "images/a500136/a500136_09.jpg",
            "images/a500136/a500136_10.jpg",
            "images/a500136/a500136_11.jpg",
            "images/a500136/a500136_12.jpg",
            "images/a500136/a500136_13.jpg",
            "images/a500136/a500136_14.jpg",
            "images/a500136/a500136_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"In a small, picturesque town nestled amidst rolling hills and lush forests, a mysterious event unfolds as the clock strikes midnight. As the townspeople slumber, an ordinary dog named Max discovers an enchanted collar that grants him the ability to communicate with humans  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401166",
        "images": [
            "images/a401166/a401166_00.jpg",
            "images/a401166/a401166_01.jpg",
            "images/a401166/a401166_02.jpg",
            "images/a401166/a401166_03.jpg",
            "images/a401166/a401166_04.jpg",
            "images/a401166/a401166_05.jpg",
            "images/a401166/a401166_06.jpg",
            "images/a401166/a401166_07.jpg",
            "images/a401166/a401166_08.jpg",
            "images/a401166/a401166_09.jpg",
            "images/a401166/a401166_10.jpg",
            "images/a401166/a401166_11.jpg",
            "images/a401166/a401166_12.jpg",
            "images/a401166/a401166_13.jpg",
            "images/a401166/a401166_14.jpg",
            "images/a401166/a401166_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"As the sun rose, the villagers found magical gifts outside their door  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700531",
        "images": [
            "images/a700531/a700531_00.jpg",
            "images/a700531/a700531_01.jpg",
            "images/a700531/a700531_02.jpg",
            "images/a700531/a700531_03.jpg",
            "images/a700531/a700531_04.jpg",
            "images/a700531/a700531_05.jpg",
            "images/a700531/a700531_06.jpg",
            "images/a700531/a700531_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"TV showing a woman reading the news on TV  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400439",
        "images": [
            "images/a400439/a400439_00.jpg",
            "images/a400439/a400439_01.jpg",
            "images/a400439/a400439_02.jpg",
            "images/a400439/a400439_03.jpg",
            "images/a400439/a400439_04.jpg",
            "images/a400439/a400439_05.jpg",
            "images/a400439/a400439_06.jpg",
            "images/a400439/a400439_07.jpg",
            "images/a400439/a400439_08.jpg",
            "images/a400439/a400439_09.jpg",
            "images/a400439/a400439_10.jpg",
            "images/a400439/a400439_11.jpg",
            "images/a400439/a400439_12.jpg",
            "images/a400439/a400439_13.jpg",
            "images/a400439/a400439_14.jpg",
            "images/a400439/a400439_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A archer style drawing of a man in world war 2  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400320",
        "images": [
            "images/a400320/a400320_00.jpg",
            "images/a400320/a400320_01.jpg",
            "images/a400320/a400320_02.jpg",
            "images/a400320/a400320_03.jpg",
            "images/a400320/a400320_04.jpg",
            "images/a400320/a400320_05.jpg",
            "images/a400320/a400320_06.jpg",
            "images/a400320/a400320_07.jpg",
            "images/a400320/a400320_08.jpg",
            "images/a400320/a400320_09.jpg",
            "images/a400320/a400320_10.jpg",
            "images/a400320/a400320_11.jpg",
            "images/a400320/a400320_12.jpg",
            "images/a400320/a400320_13.jpg",
            "images/a400320/a400320_14.jpg",
            "images/a400320/a400320_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"an ant  found a grain of sugar.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400179",
        "images": [
            "images/a400179/a400179_00.jpg",
            "images/a400179/a400179_01.jpg",
            "images/a400179/a400179_02.jpg",
            "images/a400179/a400179_03.jpg",
            "images/a400179/a400179_04.jpg",
            "images/a400179/a400179_05.jpg",
            "images/a400179/a400179_06.jpg",
            "images/a400179/a400179_07.jpg",
            "images/a400179/a400179_08.jpg",
            "images/a400179/a400179_09.jpg",
            "images/a400179/a400179_10.jpg",
            "images/a400179/a400179_11.jpg",
            "images/a400179/a400179_12.jpg",
            "images/a400179/a400179_13.jpg",
            "images/a400179/a400179_14.jpg",
            "images/a400179/a400179_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"hong kong in very heavy rain  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700984",
        "images": [
            "images/a700984/a700984_00.jpg",
            "images/a700984/a700984_01.jpg",
            "images/a700984/a700984_02.jpg",
            "images/a700984/a700984_03.jpg",
            "images/a700984/a700984_04.jpg",
            "images/a700984/a700984_05.jpg",
            "images/a700984/a700984_06.jpg",
            "images/a700984/a700984_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A cute panda happily eating bamboo in front of a building that looks like a round packaging box  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700533",
        "images": [
            "images/a700533/a700533_00.jpg",
            "images/a700533/a700533_01.jpg",
            "images/a700533/a700533_02.jpg",
            "images/a700533/a700533_03.jpg",
            "images/a700533/a700533_04.jpg",
            "images/a700533/a700533_05.jpg",
            "images/a700533/a700533_06.jpg",
            "images/a700533/a700533_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Castle fortress in a night with green fog Art  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400503",
        "images": [
            "images/a400503/a400503_00.jpg",
            "images/a400503/a400503_01.jpg",
            "images/a400503/a400503_02.jpg",
            "images/a400503/a400503_03.jpg",
            "images/a400503/a400503_04.jpg",
            "images/a400503/a400503_05.jpg",
            "images/a400503/a400503_06.jpg",
            "images/a400503/a400503_07.jpg",
            "images/a400503/a400503_08.jpg",
            "images/a400503/a400503_09.jpg",
            "images/a400503/a400503_10.jpg",
            "images/a400503/a400503_11.jpg",
            "images/a400503/a400503_12.jpg",
            "images/a400503/a400503_13.jpg",
            "images/a400503/a400503_14.jpg",
            "images/a400503/a400503_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a dancing cat in ohio Street ward no 67  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400976",
        "images": [
            "images/a400976/a400976_00.jpg",
            "images/a400976/a400976_01.jpg",
            "images/a400976/a400976_02.jpg",
            "images/a400976/a400976_03.jpg",
            "images/a400976/a400976_04.jpg",
            "images/a400976/a400976_05.jpg",
            "images/a400976/a400976_06.jpg",
            "images/a400976/a400976_07.jpg",
            "images/a400976/a400976_08.jpg",
            "images/a400976/a400976_09.jpg",
            "images/a400976/a400976_10.jpg",
            "images/a400976/a400976_11.jpg",
            "images/a400976/a400976_12.jpg",
            "images/a400976/a400976_13.jpg",
            "images/a400976/a400976_14.jpg",
            "images/a400976/a400976_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a women with pink hair and white skin with bikini running in Iceland  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400423",
        "images": [
            "images/a400423/a400423_00.jpg",
            "images/a400423/a400423_01.jpg",
            "images/a400423/a400423_02.jpg",
            "images/a400423/a400423_03.jpg",
            "images/a400423/a400423_04.jpg",
            "images/a400423/a400423_05.jpg",
            "images/a400423/a400423_06.jpg",
            "images/a400423/a400423_07.jpg",
            "images/a400423/a400423_08.jpg",
            "images/a400423/a400423_09.jpg",
            "images/a400423/a400423_10.jpg",
            "images/a400423/a400423_11.jpg",
            "images/a400423/a400423_12.jpg",
            "images/a400423/a400423_13.jpg",
            "images/a400423/a400423_14.jpg",
            "images/a400423/a400423_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A girl dances in the moonlight to Bach music  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700213",
        "images": [
            "images/a700213/a700213_00.jpg",
            "images/a700213/a700213_01.jpg",
            "images/a700213/a700213_02.jpg",
            "images/a700213/a700213_03.jpg",
            "images/a700213/a700213_04.jpg",
            "images/a700213/a700213_05.jpg",
            "images/a700213/a700213_06.jpg",
            "images/a700213/a700213_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a little gril in the wind  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400927",
        "images": [
            "images/a400927/a400927_00.jpg",
            "images/a400927/a400927_01.jpg",
            "images/a400927/a400927_02.jpg",
            "images/a400927/a400927_03.jpg",
            "images/a400927/a400927_04.jpg",
            "images/a400927/a400927_05.jpg",
            "images/a400927/a400927_06.jpg",
            "images/a400927/a400927_07.jpg",
            "images/a400927/a400927_08.jpg",
            "images/a400927/a400927_09.jpg",
            "images/a400927/a400927_10.jpg",
            "images/a400927/a400927_11.jpg",
            "images/a400927/a400927_12.jpg",
            "images/a400927/a400927_13.jpg",
            "images/a400927/a400927_14.jpg",
            "images/a400927/a400927_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A tv screen, The weather channel, a weatheman points at a map of the country, storms  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400983",
        "images": [
            "images/a400983/a400983_00.jpg",
            "images/a400983/a400983_01.jpg",
            "images/a400983/a400983_02.jpg",
            "images/a400983/a400983_03.jpg",
            "images/a400983/a400983_04.jpg",
            "images/a400983/a400983_05.jpg",
            "images/a400983/a400983_06.jpg",
            "images/a400983/a400983_07.jpg",
            "images/a400983/a400983_08.jpg",
            "images/a400983/a400983_09.jpg",
            "images/a400983/a400983_10.jpg",
            "images/a400983/a400983_11.jpg",
            "images/a400983/a400983_12.jpg",
            "images/a400983/a400983_13.jpg",
            "images/a400983/a400983_14.jpg",
            "images/a400983/a400983_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"waiting for the rain to stop cartoon image  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700300",
        "images": [
            "images/a700300/a700300_00.jpg",
            "images/a700300/a700300_01.jpg",
            "images/a700300/a700300_02.jpg",
            "images/a700300/a700300_03.jpg",
            "images/a700300/a700300_04.jpg",
            "images/a700300/a700300_05.jpg",
            "images/a700300/a700300_06.jpg",
            "images/a700300/a700300_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Create enchanting images of elegant and alluring women, each framed in exquisite poses and adorned with captivating backgrounds that amplify their beauty. Explore a diverse range of settings, from picturesque landscapes to sophisticated urban scenes, to bring forth the essence of glamour and allure in the generated images.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700038",
        "images": [
            "images/a700038/a700038_00.jpg",
            "images/a700038/a700038_01.jpg",
            "images/a700038/a700038_02.jpg",
            "images/a700038/a700038_03.jpg",
            "images/a700038/a700038_04.jpg",
            "images/a700038/a700038_05.jpg",
            "images/a700038/a700038_06.jpg",
            "images/a700038/a700038_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Why Satan Appeared Before God in the Bible, 8k, realistic, fulhd  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700125",
        "images": [
            "images/a700125/a700125_00.jpg",
            "images/a700125/a700125_01.jpg",
            "images/a700125/a700125_02.jpg",
            "images/a700125/a700125_03.jpg",
            "images/a700125/a700125_04.jpg",
            "images/a700125/a700125_05.jpg",
            "images/a700125/a700125_06.jpg",
            "images/a700125/a700125_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Experience the consequences of relying too heavily on AI, as it leads to a society on the brink of collapse. With a cinematic style, this visual will leave a lasting impression on the dangers of unchecked technological advancement.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700329",
        "images": [
            "images/a700329/a700329_00.jpg",
            "images/a700329/a700329_01.jpg",
            "images/a700329/a700329_02.jpg",
            "images/a700329/a700329_03.jpg",
            "images/a700329/a700329_04.jpg",
            "images/a700329/a700329_05.jpg",
            "images/a700329/a700329_06.jpg",
            "images/a700329/a700329_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Handsome Asian boy illustration image talking  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400305",
        "images": [
            "images/a400305/a400305_00.jpg",
            "images/a400305/a400305_01.jpg",
            "images/a400305/a400305_02.jpg",
            "images/a400305/a400305_03.jpg",
            "images/a400305/a400305_04.jpg",
            "images/a400305/a400305_05.jpg",
            "images/a400305/a400305_06.jpg",
            "images/a400305/a400305_07.jpg",
            "images/a400305/a400305_08.jpg",
            "images/a400305/a400305_09.jpg",
            "images/a400305/a400305_10.jpg",
            "images/a400305/a400305_11.jpg",
            "images/a400305/a400305_12.jpg",
            "images/a400305/a400305_13.jpg",
            "images/a400305/a400305_14.jpg",
            "images/a400305/a400305_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Roofs that protect from the sun  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400879",
        "images": [
            "images/a400879/a400879_00.jpg",
            "images/a400879/a400879_01.jpg",
            "images/a400879/a400879_02.jpg",
            "images/a400879/a400879_03.jpg",
            "images/a400879/a400879_04.jpg",
            "images/a400879/a400879_05.jpg",
            "images/a400879/a400879_06.jpg",
            "images/a400879/a400879_07.jpg",
            "images/a400879/a400879_08.jpg",
            "images/a400879/a400879_09.jpg",
            "images/a400879/a400879_10.jpg",
            "images/a400879/a400879_11.jpg",
            "images/a400879/a400879_12.jpg",
            "images/a400879/a400879_13.jpg",
            "images/a400879/a400879_14.jpg",
            "images/a400879/a400879_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"man in the airport walking to the gate  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400329",
        "images": [
            "images/a400329/a400329_00.jpg",
            "images/a400329/a400329_01.jpg",
            "images/a400329/a400329_02.jpg",
            "images/a400329/a400329_03.jpg",
            "images/a400329/a400329_04.jpg",
            "images/a400329/a400329_05.jpg",
            "images/a400329/a400329_06.jpg",
            "images/a400329/a400329_07.jpg",
            "images/a400329/a400329_08.jpg",
            "images/a400329/a400329_09.jpg",
            "images/a400329/a400329_10.jpg",
            "images/a400329/a400329_11.jpg",
            "images/a400329/a400329_12.jpg",
            "images/a400329/a400329_13.jpg",
            "images/a400329/a400329_14.jpg",
            "images/a400329/a400329_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a girl in a blue dress stands near a water tree  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400718",
        "images": [
            "images/a400718/a400718_00.jpg",
            "images/a400718/a400718_01.jpg",
            "images/a400718/a400718_02.jpg",
            "images/a400718/a400718_03.jpg",
            "images/a400718/a400718_04.jpg",
            "images/a400718/a400718_05.jpg",
            "images/a400718/a400718_06.jpg",
            "images/a400718/a400718_07.jpg",
            "images/a400718/a400718_08.jpg",
            "images/a400718/a400718_09.jpg",
            "images/a400718/a400718_10.jpg",
            "images/a400718/a400718_11.jpg",
            "images/a400718/a400718_12.jpg",
            "images/a400718/a400718_13.jpg",
            "images/a400718/a400718_14.jpg",
            "images/a400718/a400718_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"spaceship launching into space with cheering crowds below.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700621",
        "images": [
            "images/a700621/a700621_00.jpg",
            "images/a700621/a700621_01.jpg",
            "images/a700621/a700621_02.jpg",
            "images/a700621/a700621_03.jpg",
            "images/a700621/a700621_04.jpg",
            "images/a700621/a700621_05.jpg",
            "images/a700621/a700621_06.jpg",
            "images/a700621/a700621_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Ancient Chinese general with a big green lake in the background  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700769",
        "images": [
            "images/a700769/a700769_00.jpg",
            "images/a700769/a700769_01.jpg",
            "images/a700769/a700769_02.jpg",
            "images/a700769/a700769_03.jpg",
            "images/a700769/a700769_04.jpg",
            "images/a700769/a700769_05.jpg",
            "images/a700769/a700769_06.jpg",
            "images/a700769/a700769_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"CIBER PUNK field of tombstones rain background  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401199",
        "images": [
            "images/a401199/a401199_00.jpg",
            "images/a401199/a401199_01.jpg",
            "images/a401199/a401199_02.jpg",
            "images/a401199/a401199_03.jpg",
            "images/a401199/a401199_04.jpg",
            "images/a401199/a401199_05.jpg",
            "images/a401199/a401199_06.jpg",
            "images/a401199/a401199_07.jpg",
            "images/a401199/a401199_08.jpg",
            "images/a401199/a401199_09.jpg",
            "images/a401199/a401199_10.jpg",
            "images/a401199/a401199_11.jpg",
            "images/a401199/a401199_12.jpg",
            "images/a401199/a401199_13.jpg",
            "images/a401199/a401199_14.jpg",
            "images/a401199/a401199_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Once upon a time there was a woman named Mary, mother of Jesus of Nazareth.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400203",
        "images": [
            "images/a400203/a400203_00.jpg",
            "images/a400203/a400203_01.jpg",
            "images/a400203/a400203_02.jpg",
            "images/a400203/a400203_03.jpg",
            "images/a400203/a400203_04.jpg",
            "images/a400203/a400203_05.jpg",
            "images/a400203/a400203_06.jpg",
            "images/a400203/a400203_07.jpg",
            "images/a400203/a400203_08.jpg",
            "images/a400203/a400203_09.jpg",
            "images/a400203/a400203_10.jpg",
            "images/a400203/a400203_11.jpg",
            "images/a400203/a400203_12.jpg",
            "images/a400203/a400203_13.jpg",
            "images/a400203/a400203_14.jpg",
            "images/a400203/a400203_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"animals decided to plant a beautiful field of wildflowers near Squarrel tree, creating a colorful and vibrant display of gratitude.  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400889",
        "images": [
            "images/a400889/a400889_00.jpg",
            "images/a400889/a400889_01.jpg",
            "images/a400889/a400889_02.jpg",
            "images/a400889/a400889_03.jpg",
            "images/a400889/a400889_04.jpg",
            "images/a400889/a400889_05.jpg",
            "images/a400889/a400889_06.jpg",
            "images/a400889/a400889_07.jpg",
            "images/a400889/a400889_08.jpg",
            "images/a400889/a400889_09.jpg",
            "images/a400889/a400889_10.jpg",
            "images/a400889/a400889_11.jpg",
            "images/a400889/a400889_12.jpg",
            "images/a400889/a400889_13.jpg",
            "images/a400889/a400889_14.jpg",
            "images/a400889/a400889_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A guy with his hands behind his back is skateboarding down a winding road  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400307",
        "images": [
            "images/a400307/a400307_00.jpg",
            "images/a400307/a400307_01.jpg",
            "images/a400307/a400307_02.jpg",
            "images/a400307/a400307_03.jpg",
            "images/a400307/a400307_04.jpg",
            "images/a400307/a400307_05.jpg",
            "images/a400307/a400307_06.jpg",
            "images/a400307/a400307_07.jpg",
            "images/a400307/a400307_08.jpg",
            "images/a400307/a400307_09.jpg",
            "images/a400307/a400307_10.jpg",
            "images/a400307/a400307_11.jpg",
            "images/a400307/a400307_12.jpg",
            "images/a400307/a400307_13.jpg",
            "images/a400307/a400307_14.jpg",
            "images/a400307/a400307_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"chameleon dj playing music at a festival  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400700",
        "images": [
            "images/a400700/a400700_00.jpg",
            "images/a400700/a400700_01.jpg",
            "images/a400700/a400700_02.jpg",
            "images/a400700/a400700_03.jpg",
            "images/a400700/a400700_04.jpg",
            "images/a400700/a400700_05.jpg",
            "images/a400700/a400700_06.jpg",
            "images/a400700/a400700_07.jpg",
            "images/a400700/a400700_08.jpg",
            "images/a400700/a400700_09.jpg",
            "images/a400700/a400700_10.jpg",
            "images/a400700/a400700_11.jpg",
            "images/a400700/a400700_12.jpg",
            "images/a400700/a400700_13.jpg",
            "images/a400700/a400700_14.jpg",
            "images/a400700/a400700_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"kids playing with dog in jungle  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500241",
        "images": [
            "images/a500241/a500241_00.jpg",
            "images/a500241/a500241_01.jpg",
            "images/a500241/a500241_02.jpg",
            "images/a500241/a500241_03.jpg",
            "images/a500241/a500241_04.jpg",
            "images/a500241/a500241_05.jpg",
            "images/a500241/a500241_06.jpg",
            "images/a500241/a500241_07.jpg",
            "images/a500241/a500241_08.jpg",
            "images/a500241/a500241_09.jpg",
            "images/a500241/a500241_10.jpg",
            "images/a500241/a500241_11.jpg",
            "images/a500241/a500241_12.jpg",
            "images/a500241/a500241_13.jpg",
            "images/a500241/a500241_14.jpg",
            "images/a500241/a500241_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"hyperlapse wide shot of a galaxy formation, ar 16:9  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700627",
        "images": [
            "images/a700627/a700627_00.jpg",
            "images/a700627/a700627_01.jpg",
            "images/a700627/a700627_02.jpg",
            "images/a700627/a700627_03.jpg",
            "images/a700627/a700627_04.jpg",
            "images/a700627/a700627_05.jpg",
            "images/a700627/a700627_06.jpg",
            "images/a700627/a700627_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"elf girl shooting from a bow  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500373",
        "images": [
            "images/a500373/a500373_00.jpg",
            "images/a500373/a500373_01.jpg",
            "images/a500373/a500373_02.jpg",
            "images/a500373/a500373_03.jpg",
            "images/a500373/a500373_04.jpg",
            "images/a500373/a500373_05.jpg",
            "images/a500373/a500373_06.jpg",
            "images/a500373/a500373_07.jpg",
            "images/a500373/a500373_08.jpg",
            "images/a500373/a500373_09.jpg",
            "images/a500373/a500373_10.jpg",
            "images/a500373/a500373_11.jpg",
            "images/a500373/a500373_12.jpg",
            "images/a500373/a500373_13.jpg",
            "images/a500373/a500373_14.jpg",
            "images/a500373/a500373_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"an old news archives video of jeanne of arc, 4:3, black and white  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401450",
        "images": [
            "images/a401450/a401450_00.jpg",
            "images/a401450/a401450_01.jpg",
            "images/a401450/a401450_02.jpg",
            "images/a401450/a401450_03.jpg",
            "images/a401450/a401450_04.jpg",
            "images/a401450/a401450_05.jpg",
            "images/a401450/a401450_06.jpg",
            "images/a401450/a401450_07.jpg",
            "images/a401450/a401450_08.jpg",
            "images/a401450/a401450_09.jpg",
            "images/a401450/a401450_10.jpg",
            "images/a401450/a401450_11.jpg",
            "images/a401450/a401450_12.jpg",
            "images/a401450/a401450_13.jpg",
            "images/a401450/a401450_14.jpg",
            "images/a401450/a401450_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"A bird flying over the Niagara Falls  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401171",
        "images": [
            "images/a401171/a401171_00.jpg",
            "images/a401171/a401171_01.jpg",
            "images/a401171/a401171_02.jpg",
            "images/a401171/a401171_03.jpg",
            "images/a401171/a401171_04.jpg",
            "images/a401171/a401171_05.jpg",
            "images/a401171/a401171_06.jpg",
            "images/a401171/a401171_07.jpg",
            "images/a401171/a401171_08.jpg",
            "images/a401171/a401171_09.jpg",
            "images/a401171/a401171_10.jpg",
            "images/a401171/a401171_11.jpg",
            "images/a401171/a401171_12.jpg",
            "images/a401171/a401171_13.jpg",
            "images/a401171/a401171_14.jpg",
            "images/a401171/a401171_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a portrait of the stoic epictetus in black and white 16:9 ratio  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700649",
        "images": [
            "images/a700649/a700649_00.jpg",
            "images/a700649/a700649_01.jpg",
            "images/a700649/a700649_02.jpg",
            "images/a700649/a700649_03.jpg",
            "images/a700649/a700649_04.jpg",
            "images/a700649/a700649_05.jpg",
            "images/a700649/a700649_06.jpg",
            "images/a700649/a700649_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a healthy and colorful meal with avocados ar16:9  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700540",
        "images": [
            "images/a700540/a700540_00.jpg",
            "images/a700540/a700540_01.jpg",
            "images/a700540/a700540_02.jpg",
            "images/a700540/a700540_03.jpg",
            "images/a700540/a700540_04.jpg",
            "images/a700540/a700540_05.jpg",
            "images/a700540/a700540_06.jpg",
            "images/a700540/a700540_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Go Pro Film, Billie Eilish exploring Wonderland and meeting various creatures, such as the Caterpillar, the Duchess, the Queen of Hearts,  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a700479",
        "images": [
            "images/a700479/a700479_00.jpg",
            "images/a700479/a700479_01.jpg",
            "images/a700479/a700479_02.jpg",
            "images/a700479/a700479_03.jpg",
            "images/a700479/a700479_04.jpg",
            "images/a700479/a700479_05.jpg",
            "images/a700479/a700479_06.jpg",
            "images/a700479/a700479_07.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Alpha Wolf in the dark forest stars shining and smoky environment ar9:16 portrait mode  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500106",
        "images": [
            "images/a500106/a500106_00.jpg",
            "images/a500106/a500106_01.jpg",
            "images/a500106/a500106_02.jpg",
            "images/a500106/a500106_03.jpg",
            "images/a500106/a500106_04.jpg",
            "images/a500106/a500106_05.jpg",
            "images/a500106/a500106_06.jpg",
            "images/a500106/a500106_07.jpg",
            "images/a500106/a500106_08.jpg",
            "images/a500106/a500106_09.jpg",
            "images/a500106/a500106_10.jpg",
            "images/a500106/a500106_11.jpg",
            "images/a500106/a500106_12.jpg",
            "images/a500106/a500106_13.jpg",
            "images/a500106/a500106_14.jpg",
            "images/a500106/a500106_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"Introduce a team of human explorers who land on the alien planet. Describe their motivations and expectations as they explore this new world in 2D animation  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a400017",
        "images": [
            "images/a400017/a400017_00.jpg",
            "images/a400017/a400017_01.jpg",
            "images/a400017/a400017_02.jpg",
            "images/a400017/a400017_03.jpg",
            "images/a400017/a400017_04.jpg",
            "images/a400017/a400017_05.jpg",
            "images/a400017/a400017_06.jpg",
            "images/a400017/a400017_07.jpg",
            "images/a400017/a400017_08.jpg",
            "images/a400017/a400017_09.jpg",
            "images/a400017/a400017_10.jpg",
            "images/a400017/a400017_11.jpg",
            "images/a400017/a400017_12.jpg",
            "images/a400017/a400017_13.jpg",
            "images/a400017/a400017_14.jpg",
            "images/a400017/a400017_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a dino whos is eating meat  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a500440",
        "images": [
            "images/a500440/a500440_00.jpg",
            "images/a500440/a500440_01.jpg",
            "images/a500440/a500440_02.jpg",
            "images/a500440/a500440_03.jpg",
            "images/a500440/a500440_04.jpg",
            "images/a500440/a500440_05.jpg",
            "images/a500440/a500440_06.jpg",
            "images/a500440/a500440_07.jpg",
            "images/a500440/a500440_08.jpg",
            "images/a500440/a500440_09.jpg",
            "images/a500440/a500440_10.jpg",
            "images/a500440/a500440_11.jpg",
            "images/a500440/a500440_12.jpg",
            "images/a500440/a500440_13.jpg",
            "images/a500440/a500440_14.jpg",
            "images/a500440/a500440_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"chocolate stream falling into chocolate cake,4k,beautiful,ultrarealistic  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    },
    {
        "id": "a401017",
        "images": [
            "images/a401017/a401017_00.jpg",
            "images/a401017/a401017_01.jpg",
            "images/a401017/a401017_02.jpg",
            "images/a401017/a401017_03.jpg",
            "images/a401017/a401017_04.jpg",
            "images/a401017/a401017_05.jpg",
            "images/a401017/a401017_06.jpg",
            "images/a401017/a401017_07.jpg",
            "images/a401017/a401017_08.jpg",
            "images/a401017/a401017_09.jpg",
            "images/a401017/a401017_10.jpg",
            "images/a401017/a401017_11.jpg",
            "images/a401017/a401017_12.jpg",
            "images/a401017/a401017_13.jpg",
            "images/a401017/a401017_14.jpg",
            "images/a401017/a401017_15.jpg"
        ],
        "prompt": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"portrait of a young and beautiful Girl resident of Pakistan with bathing suits in the Beach, Iphone Pro 14, Sunlight  \",\nall the frames of video are as follows: \n\n",
        "labels": {
            "visual quality": 1.0,
            "dynamic degree": 3.0,
            "text-to-video alignment": 1.0,
            "factual consistency": 1.0,
            "temporal consistency": 1.0
        }
    }
]