sedona(Geospark)读取csv
阅读原文时间:2023年07月10日阅读:2
package com.grady.sedona

import org.apache.sedona.sql.utils.SedonaSQLRegistrator
import org.apache.sedona.viz.core.Serde.SedonaVizKryoRegistrator
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.SparkSession

object SedonaReadCsv {

  // hdfs 文件位置
  val csvPointInputLocation = "/tmp/jiang/" + "testpoint.csv"

  def main(args: Array[String]): Unit = {
    val ss:SparkSession = SparkSession.builder()
      .config("spark.serializer",classOf[KryoSerializer].getName)
      .config("spark.kryo.registrator", classOf[SedonaVizKryoRegistrator].getName)
      .appName("SedonaAnalysisScv").getOrCreate()

    SedonaSQLRegistrator.registerAll(ss)

    readCsv(ss)

    ss.stop()
  }

  def readCsv(ss: SparkSession): Unit = {
    val pointCsvDF = ss.read
      .format("csv")
      .option("delimiter",",")
      .option("header","false")
      .load(csvPointInputLocation)

    pointCsvDF.createOrReplaceTempView("test_point_csv")
    pointCsvDF.show(10)

    val pointDF = ss.sql("select ST_Point(cast(test_point_csv._c0 as Decimal(24,20)),cast(test_point_csv._c1 as Decimal(24,20))) as pointshape from test_point_csv")
    pointCsvDF.createOrReplaceTempView("test_point")
    pointDF.show()
  }

}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <artifactId>spark-practise</artifactId>
        <groupId>org.example</groupId>
        <version>1.0-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

    <artifactId>sedona</artifactId>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_${scala.binary.version}</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_${scala.binary.version}</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-yarn_${scala.binary.version}</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>

        <!-- sedona -->
        <dependency>
            <groupId>org.apache.sedona</groupId>
            <artifactId>sedona-core-3.0_2.12</artifactId>
            <version>1.1.1-incubating</version>
        </dependency>
        <dependency>
            <groupId>org.apache.sedona</groupId>
            <artifactId>sedona-sql-3.0_2.12</artifactId>
            <version>1.1.1-incubating</version>
        </dependency>
        <dependency>
            <groupId>org.apache.sedona</groupId>
            <artifactId>sedona-viz-3.0_2.12</artifactId>
            <version>1.1.1-incubating</version>
        </dependency>
        <dependency>
            <groupId>org.locationtech.jts</groupId>
            <artifactId>jts-core</artifactId>
            <version>1.18.0</version>
        </dependency>
    </dependencies>

    <build>
        <resources>
            <resource>
                <directory>src/main/resources</directory>
                <filtering>true</filtering>
            </resource>
        </resources>

        <plugins>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.2.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <scalaVersion>${scala.version}</scalaVersion>
                </configuration>
                <executions>
                    <execution>
                        <id>scala-compile-first</id>
                        <phase>process-resources</phase>
                        <goals>
                            <goal>add-source</goal>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                    <execution>
                        <id>scala-test-compile</id>
                        <phase>process-test-resources</phase>
                        <goals>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.2.1</version>
                <configuration>
                    <artifactSet>
                        <excludes>
                            <exclude>org.slf4j:*</exclude>
                        </excludes>
                    </artifactSet>
                </configuration>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <createDependencyReducedPom>false</createDependencyReducedPom>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

执行spark-submit --master yarn --driver-memory=2G --class com.grady.sedona.SedonaReadCsv /app/data/appdeploy/sedona-1.0-SNAPSHOT.jar

日志:

+----+-----+
| _c0|  _c1|
+----+-----+
| 1.1|101.1|
| 2.1|102.1|
| 3.1|103.1|
| 4.1|104.1|
| 5.1|105.1|
| 6.1|106.1|
| 7.1|107.1|
| 8.1|108.1|
| 9.1|109.1|
|10.1|110.1|
+----+-----+

+------------------+
|        pointshape|
+------------------+
| POINT (1.1 101.1)|
| POINT (2.1 102.1)|
| POINT (3.1 103.1)|
| POINT (4.1 104.1)|
| POINT (5.1 105.1)|
| POINT (6.1 106.1)|
| POINT (7.1 107.1)|
| POINT (8.1 108.1)|
| POINT (9.1 109.1)|
|POINT (10.1 110.1)|
|POINT (11.1 111.1)|
|POINT (12.1 112.1)|
|POINT (13.1 113.1)|
|POINT (14.1 114.1)|
|POINT (15.1 115.1)|
|POINT (16.1 116.1)|
|POINT (17.1 117.1)|
|POINT (18.1 118.1)|
|POINT (19.1 119.1)|
|POINT (20.1 120.1)|
+------------------+
only showing top 20 rows

手机扫一扫

移动阅读更方便

阿里云服务器
腾讯云服务器
七牛云服务器